// -wav=output.wav -minA=500 -minB=2000 -rms=10 -ratio=0.3 package main import ( "flag" "fmt" "log" "math" "os" "time" "github.com/go-audio/audio" "github.com/go-audio/wav" ) // Command-line flags var ( wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file") minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)") minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)") gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)") winMs = flag.Int("win", 100, "Window size (ms)") hopMs = flag.Int("hop", 50, "Hop size (ms)") ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection") rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal") ) // Goertzel struct for frequency detection type goertzel struct { N int fs float64 k int coeff float64 } func newGoertzel(targetHz float64, fs float64, N int) *goertzel { g := &goertzel{N: N, fs: fs} g.k = int(0.5 + (float64(g.N)*targetHz)/fs) omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N) g.coeff = 2.0 * math.Cos(omega) return g } func (g *goertzel) Power(x []float64) float64 { var s0, s1, s2 float64 for i := 0; i < g.N; i++ { s0 = x[i] + g.coeff*s1 - s2 s2 = s1 s1 = s0 } omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N) real := s1 - s2*math.Cos(omega) imag := s2 * math.Sin(omega) return real*real + imag*imag } func windowHann(x []float64) { n := float64(len(x)) for i := range x { x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0))) } } func pcmToFloat(buf []int16, N int) []float64 { out := make([]float64, N) for i := 0; i < N && i < len(buf); i++ { out[i] = float64(buf[i]) } return out } func rmsPCM(buf []int16) float64 { var s float64 for _, v := range buf { f := float64(v) s += f * f } if len(buf) == 0 { return 0 } return math.Sqrt(s / float64(len(buf))) } // twoToneDetector for detecting tone sequences type twoToneDetector struct { fs int winN int hopN int ratioThresh float64 rmsThresh float64 minAms int minBms int gapMaxMs int freqs []float64 gzBank []*goertzel inA bool aFreq float64 aAccumMs int aStart time.Time waitingB bool bFreq float64 bAccumMs int bStart time.Time gapRemainMs int } func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector { // Frequency range: 300–3000 Hz, 10 Hz steps freqs := make([]float64, 0) for f := 300.0; f <= 3000.0; f += 10.0 { freqs = append(freqs, f) } gzBank := make([]*goertzel, len(freqs)) for i, f := range freqs { gzBank[i] = newGoertzel(f, float64(fs), winN) } return &twoToneDetector{ fs: fs, winN: winN, hopN: hopN, ratioThresh: ratioThresh, rmsThresh: rmsThresh, minAms: minAms, minBms: minBms, gapMaxMs: gapMaxMs, freqs: freqs, gzBank: gzBank, } } func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) { xi := pcmToFloat(pcms, d.winN) windowHann(xi) var total float64 for _, v := range xi { total += v * v } r := rmsPCM(pcms) if r < d.rmsThresh { d.reset() return "", 0, 0, 0, 0 } // Find frequency with highest power bestIdx := -1 bestPow := 0.0 for i, gz := range d.gzBank { p := gz.Power(xi) if p > bestPow { bestPow = p bestIdx = i } } ratio := bestPow / (total + 1e-12) if ratio < d.ratioThresh { d.reset() return "", 0, 0, 0, 0 } freq := d.freqs[bestIdx] hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs))) now := t0 if !d.inA && !d.waitingB { // Looking for Tone A d.inA = true d.aFreq = freq d.aAccumMs = int(hopDur.Milliseconds()) d.aStart = now } else if d.inA && !d.waitingB { // Confirming Tone A if math.Abs(freq-d.aFreq) <= 10.0 { d.aAccumMs += int(hopDur.Milliseconds()) if d.aAccumMs >= d.minAms { d.inA = false d.waitingB = true d.gapRemainMs = d.gapMaxMs } } else { d.reset() } } else if d.waitingB { d.gapRemainMs -= int(hopDur.Milliseconds()) if d.gapRemainMs <= 0 { d.reset() } else if math.Abs(freq-d.aFreq) > 10.0 { // Check for Tone B (different frequency) if d.bAccumMs == 0 { d.bFreq = freq d.bStart = now } else if math.Abs(freq-d.bFreq) > 10.0 { // Switched to a different frequency, reset B d.bFreq = freq d.bAccumMs = 0 d.bStart = now } d.bAccumMs += int(hopDur.Milliseconds()) if d.bAccumMs >= d.minBms { event = "TWO_TONE_DETECTED" return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs) } } } return "", 0, 0, 0, 0 } func (d *twoToneDetector) reset() { d.inA = false d.aFreq = 0 d.aAccumMs = 0 d.aStart = time.Time{} d.waitingB = false d.bFreq = 0 d.bAccumMs = 0 d.bStart = time.Time{} d.gapRemainMs = 0 } func main() { flag.Parse() if *wavFile == "" { log.Fatal("WAV file path is required (use -wav flag)") } file, err := os.Open(*wavFile) if err != nil { log.Fatalf("Failed to open WAV file: %v", err) } defer file.Close() decoder := wav.NewDecoder(file) if !decoder.IsValidFile() { log.Fatal("Invalid WAV file") } if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 { log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels", decoder.Format().SampleRate, decoder.Format().NumChannels) } const fs = 8000 winN := int(float64(fs) * float64(*winMs) / 1000.0) hopN := int(float64(fs) * float64(*hopMs) / 1000.0) if winN <= 0 || hopN <= 0 || hopN > winN { log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN) } det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs) buf := &audio.IntBuffer{ Format: &audio.Format{SampleRate: fs, NumChannels: 1}, Data: make([]int, 8192), SourceBitDepth: 16, } sampleCount := 0 startTime := time.Now() log.Println("Processing WAV file...") for { n, err := decoder.PCMBuffer(buf) if err != nil || n == 0 || len(buf.Data) == 0 { log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs)) break } pcm := make([]int16, n) for i, v := range buf.Data[:n] { pcm[i] = int16(v) } sampleCount += n for offset := 0; offset <= len(pcm)-winN; offset += hopN { win := pcm[offset:min(offset+winN, len(pcm))] t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs)) event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t) if event != "" { fmt.Printf("Detected two-tone sequence:\n") fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur) fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur) det.reset() } } } } func min(a, b int) int { if a < b { return a } return b }