package main import ( "flag" "fmt" "log/slog" "math" "os" "time" "github.com/go-audio/audio" "github.com/go-audio/wav" ) // Command-line flags var ( wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file") minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)") minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)") gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)") winMs = flag.Int("win", 100, "Window size (ms)") hopMs = flag.Int("hop", 50, "Hop size (ms)") ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection") rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal") verbose = flag.Bool("verbose", false, "Enable debug logging") ) // Goertzel struct for frequency detection type goertzel struct { N int fs float64 k int coeff float64 } func newGoertzel(targetHz float64, fs float64, N int) *goertzel { g := &goertzel{N: N, fs: fs} g.k = int(0.5 + (float64(g.N)*targetHz)/fs) omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N) g.coeff = 2.0 * math.Cos(omega) return g } func (g *goertzel) Power(x []float64) float64 { var s0, s1, s2 float64 for i := 0; i < g.N; i++ { s0 = x[i] + g.coeff*s1 - s2 s2 = s1 s1 = s0 } omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N) real := s1 - s2*math.Cos(omega) imag := s2 * math.Sin(omega) return real*real + imag*imag } func windowHann(x []float64) { n := float64(len(x)) for i := range x { x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0))) } } func pcmToFloat(buf []int16, N int) []float64 { out := make([]float64, N) for i := 0; i < N && i < len(buf); i++ { out[i] = float64(buf[i]) } return out } func rmsPCM(buf []int16) float64 { var s float64 for _, v := range buf { f := float64(v) s += f * f } if len(buf) == 0 { return 0 } return math.Sqrt(s / float64(len(buf))) } // twoToneDetector for detecting tone sequences type twoToneDetector struct { fs int winN int hopN int ratioThresh float64 rmsThresh float64 minAms int minBms int gapMaxMs int freqs []float64 gzBank []*goertzel inA bool aFreq float64 aAccumMs int aStart time.Time aEnd time.Time waitingB bool bFreq float64 bAccumMs int bStart time.Time bEnd time.Time gapRemainMs int logger *slog.Logger } func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int, logger *slog.Logger) *twoToneDetector { freqs := make([]float64, 0) for f := 300.0; f <= 3000.0; f += 10.0 { freqs = append(freqs, f) } gzBank := make([]*goertzel, len(freqs)) for i, f := range freqs { gzBank[i] = newGoertzel(f, float64(fs), winN) } return &twoToneDetector{ fs: fs, winN: winN, hopN: hopN, ratioThresh: ratioThresh, rmsThresh: rmsThresh, minAms: minAms, minBms: minBms, gapMaxMs: gapMaxMs, freqs: freqs, gzBank: gzBank, logger: logger, } } func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64, timestamp time.Time) { xi := pcmToFloat(pcms, d.winN) windowHann(xi) var total float64 for _, v := range xi { total += v * v } r := rmsPCM(pcms) hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs))) now := t0 if r < d.rmsThresh { d.logger.Debug("RMS below threshold, resetting", "time", now.Format(time.RFC3339), "rms", fmt.Sprintf("%.2f", r), "threshold", d.rmsThresh) d.reset() return "", 0, 0, 0, 0, time.Time{} } // Find frequency with highest power bestIdx := -1 bestPow := 0.0 for i, gz := range d.gzBank { p := gz.Power(xi) if p > bestPow { bestPow = p bestIdx = i } } ratio := bestPow / (total + 1e-12) if ratio < d.ratioThresh { d.logger.Debug("Ratio below threshold, resetting", "time", now.Format(time.RFC3339), "ratio", fmt.Sprintf("%.3f", ratio), "threshold", d.ratioThresh) d.reset() return "", 0, 0, 0, 0, time.Time{} } freq := d.freqs[bestIdx] if !d.inA && !d.waitingB { // Looking for Tone A d.inA = true d.aFreq = freq d.aStart = now } else if d.inA && !d.waitingB { // Confirming Tone A if math.Abs(freq-d.aFreq) <= 10.0 { d.aAccumMs += int(hopDur.Milliseconds()) d.aEnd = now.Add(hopDur) if d.aAccumMs >= d.minAms { d.inA = false d.waitingB = true d.gapRemainMs = d.gapMaxMs } } else { d.logger.Debug("Frequency differs from Tone A, resetting", "time", now.Format(time.RFC3339), "freq", fmt.Sprintf("%.1f", freq), "tone_a_freq", fmt.Sprintf("%.1f", d.aFreq)) d.reset() } } else if d.waitingB { d.gapRemainMs -= int(hopDur.Milliseconds()) if d.gapRemainMs <= 0 { d.logger.Debug("Gap exceeded max duration, resetting", "time", now.Format(time.RFC3339), "gap_max_ms", d.gapMaxMs) d.reset() } else if math.Abs(freq-d.aFreq) > 10.0 { // Check for Tone B if d.bAccumMs == 0 { d.bFreq = freq d.bStart = now } else if math.Abs(freq-d.bFreq) > 10.0 { d.logger.Debug("Frequency differs from Tone B, resetting B", "time", now.Format(time.RFC3339), "freq", fmt.Sprintf("%.1f", freq), "tone_b_freq", fmt.Sprintf("%.1f", d.bFreq)) d.bFreq = freq d.bAccumMs = 0 d.bStart = now } d.bAccumMs += int(hopDur.Milliseconds()) d.bEnd = now.Add(hopDur) if d.bAccumMs >= d.minBms { event = "TWO_TONE_DETECTED" aDurMs := float64(d.aEnd.Sub(d.aStart).Milliseconds()) bDurMs := float64(d.bEnd.Sub(d.bStart).Milliseconds()) d.logger.Info("Two-tone detected", "time", now.Format(time.RFC3339), "tone_a_freq", fmt.Sprintf("%.1f", d.aFreq), "tone_a_duration_ms", fmt.Sprintf("%.0f", aDurMs), "tone_b_freq", fmt.Sprintf("%.1f", d.bFreq), "tone_b_duration_ms", fmt.Sprintf("%.0f", bDurMs)) return event, d.aFreq, aDurMs, d.bFreq, bDurMs, now } } } return "", 0, 0, 0, 0, time.Time{} } func (d *twoToneDetector) reset() { d.inA = false d.aFreq = 0 d.aAccumMs = 0 d.aStart = time.Time{} d.aEnd = time.Time{} d.waitingB = false d.bFreq = 0 d.bAccumMs = 0 d.bStart = time.Time{} d.bEnd = time.Time{} d.gapRemainMs = 0 } func main() { flag.Parse() // Initialize slog logger logLevel := &slog.LevelVar{} logLevel.Set(slog.LevelInfo) if *verbose { logLevel.Set(slog.LevelDebug) } logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{ Level: logLevel, })) if *wavFile == "" { logger.Error("WAV file path is required", "flag", "-wav") os.Exit(1) } file, err := os.Open(*wavFile) if err != nil { logger.Error("Failed to open WAV file", "error", err) os.Exit(1) } defer file.Close() decoder := wav.NewDecoder(file) if !decoder.IsValidFile() { logger.Error("Invalid WAV file") os.Exit(1) } if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 { logger.Error("WAV file must be mono 8kHz", "sample_rate", decoder.Format().SampleRate, "channels", decoder.Format().NumChannels) os.Exit(1) } const fs = 8000 winN := int(float64(fs) * float64(*winMs) / 1000.0) hopN := int(float64(fs) * float64(*hopMs) / 1000.0) if winN <= 0 || hopN <= 0 || hopN > winN { logger.Error("Invalid window/hop parameters", "winN", winN, "hopN", hopN) os.Exit(1) } det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs, logger) buf := &audio.IntBuffer{ Format: &audio.Format{SampleRate: fs, NumChannels: 1}, Data: make([]int, 8192), SourceBitDepth: 16, } sampleCount := 0 startTime := time.Now() logger.Info("Processing WAV file") for { n, err := decoder.PCMBuffer(buf) if err != nil || n == 0 || len(buf.Data) == 0 { logger.Info("Finished processing", "samples", sampleCount, "duration_sec", fmt.Sprintf("%.2f", float64(sampleCount)/float64(fs))) break } pcm := make([]int16, n) for i, v := range buf.Data[:n] { pcm[i] = int16(v) } sampleCount += n for offset := 0; offset <= len(pcm)-winN; offset += hopN { win := pcm[offset:min(offset+winN, len(pcm))] t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs)) event, aFreq, aDur, bFreq, bDur, timestamp := det.stepWindow(win, t) if event != "" { fmt.Printf("Detected two-tone sequence at %s:\n", timestamp.Format(time.RFC3339)) fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur) fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur) det.reset() } } } } func min(a, b int) int { if a < b { return a } return b }