From b670654a4b9ad3b67c78cd054506ebeb9bd12856 Mon Sep 17 00:00:00 2001 From: Alex Savin Date: Fri, 15 Aug 2025 18:44:10 -0400 Subject: [PATCH] Refactor main.go for improved readability and add detailed comments for functions --- main.go | 534 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 298 insertions(+), 236 deletions(-) diff --git a/main.go b/main.go index ce4320b..57b3d78 100644 --- a/main.go +++ b/main.go @@ -2,291 +2,353 @@ package main import ( - "flag" - "fmt" - "log" - "math" - "os" - "time" + "flag" + "fmt" + "log" + "math" + "os" + "time" - "github.com/go-audio/audio" - "github.com/go-audio/wav" + "github.com/go-audio/audio" + "github.com/go-audio/wav" ) // Command-line flags var ( - wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file") - minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)") - minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)") - gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)") - winMs = flag.Int("win", 100, "Window size (ms)") - hopMs = flag.Int("hop", 50, "Hop size (ms)") - ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection") - rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal") + wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file") + minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)") + minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)") + gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)") + winMs = flag.Int("win", 100, "Window size (ms)") + hopMs = flag.Int("hop", 50, "Hop size (ms)") + ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection") + rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal") ) // Goertzel struct for frequency detection type goertzel struct { - N int - fs float64 - k int - coeff float64 + N int + fs float64 + k int + coeff float64 } +// newGoertzel initializes and returns a new instance of the Goertzel algorithm for detecting a specific target frequency. +// Parameters: +// +// targetHz - the target frequency in Hertz to detect. +// fs - the sampling rate in Hertz. +// N - the number of samples to process. +// +// Returns: +// +// A pointer to a goertzel struct configured for the specified frequency and sample rate. func newGoertzel(targetHz float64, fs float64, N int) *goertzel { - g := &goertzel{N: N, fs: fs} - g.k = int(0.5 + (float64(g.N)*targetHz)/fs) - omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N) - g.coeff = 2.0 * math.Cos(omega) - return g + g := &goertzel{N: N, fs: fs} + g.k = int(0.5 + (float64(g.N)*targetHz)/fs) + omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N) + g.coeff = 2.0 * math.Cos(omega) + return g } +// Power computes the power of the target frequency in the input signal x using the Goertzel algorithm. +// It processes the input slice x of length g.N and returns the squared magnitude of the frequency component +// specified by g.k. The function is typically used for efficient detection of specific frequencies in a signal. func (g *goertzel) Power(x []float64) float64 { - var s0, s1, s2 float64 - for i := 0; i < g.N; i++ { - s0 = x[i] + g.coeff*s1 - s2 - s2 = s1 - s1 = s0 - } - omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N) - real := s1 - s2*math.Cos(omega) - imag := s2 * math.Sin(omega) - return real*real + imag*imag + var s0, s1, s2 float64 + for i := 0; i < g.N; i++ { + s0 = x[i] + g.coeff*s1 - s2 + s2 = s1 + s1 = s0 + } + omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N) + real := s1 - s2*math.Cos(omega) + imag := s2 * math.Sin(omega) + return real*real + imag*imag } +// windowHann applies a Hann window to the input slice x in-place. +// The Hann window is commonly used in signal processing to reduce spectral leakage +// by tapering the beginning and end of the signal to zero. +// The function modifies the input slice directly. func windowHann(x []float64) { - n := float64(len(x)) - for i := range x { - x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0))) - } + n := float64(len(x)) + for i := range x { + x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0))) + } } +// pcmToFloat converts a slice of 16-bit PCM audio samples to a slice of float64 values. +// It processes up to N samples from the input buffer and returns the converted values. +// If the input buffer has fewer than N samples, only the available samples are converted. func pcmToFloat(buf []int16, N int) []float64 { - out := make([]float64, N) - for i := 0; i < N && i < len(buf); i++ { - out[i] = float64(buf[i]) - } - return out + out := make([]float64, N) + for i := 0; i < N && i < len(buf); i++ { + out[i] = float64(buf[i]) + } + return out } +// rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples. +// It returns the RMS as a float64, which is a measure of the signal's amplitude. +// If the input slice is empty, it returns 0. func rmsPCM(buf []int16) float64 { - var s float64 - for _, v := range buf { - f := float64(v) - s += f * f - } - if len(buf) == 0 { - return 0 - } - return math.Sqrt(s / float64(len(buf))) + var s float64 + for _, v := range buf { + f := float64(v) + s += f * f + } + if len(buf) == 0 { + return 0 + } + return math.Sqrt(s / float64(len(buf))) } // twoToneDetector for detecting tone sequences type twoToneDetector struct { - fs int - winN int - hopN int - ratioThresh float64 - rmsThresh float64 - minAms int - minBms int - gapMaxMs int - freqs []float64 - gzBank []*goertzel - inA bool - aFreq float64 - aAccumMs int - aStart time.Time - waitingB bool - bFreq float64 - bAccumMs int - bStart time.Time - gapRemainMs int + fs int + winN int + hopN int + ratioThresh float64 + rmsThresh float64 + minAms int + minBms int + gapMaxMs int + freqs []float64 + gzBank []*goertzel + inA bool + aFreq float64 + aAccumMs int + aStart time.Time + waitingB bool + bFreq float64 + bAccumMs int + bStart time.Time + gapRemainMs int } +// newTwoToneDetector creates and initializes a twoToneDetector instance with the specified parameters. +// It sets up a bank of Goertzel filters for detecting tones in the frequency range 300–3000 Hz (in 10 Hz steps). +// +// Parameters: +// +// fs - Sample rate in Hz. +// winN - Window size (number of samples per analysis window). +// hopN - Hop size (number of samples to advance per analysis). +// ratioThresh- Threshold for the ratio used in tone detection. +// rmsThresh - RMS threshold for signal energy. +// minAms - Minimum duration of a detected tone in milliseconds. +// minBms - Minimum duration of a break between tones in milliseconds. +// gapMaxMs - Maximum allowed gap between tones in milliseconds. +// +// Returns: +// +// Pointer to an initialized twoToneDetector. func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector { - // Frequency range: 300–3000 Hz, 10 Hz steps - freqs := make([]float64, 0) - for f := 300.0; f <= 3000.0; f += 10.0 { - freqs = append(freqs, f) - } - gzBank := make([]*goertzel, len(freqs)) - for i, f := range freqs { - gzBank[i] = newGoertzel(f, float64(fs), winN) - } - return &twoToneDetector{ - fs: fs, - winN: winN, - hopN: hopN, - ratioThresh: ratioThresh, - rmsThresh: rmsThresh, - minAms: minAms, - minBms: minBms, - gapMaxMs: gapMaxMs, - freqs: freqs, - gzBank: gzBank, - } + // Frequency range: 300–3000 Hz, 10 Hz steps + freqs := make([]float64, 0) + for f := 300.0; f <= 3000.0; f += 10.0 { + freqs = append(freqs, f) + } + gzBank := make([]*goertzel, len(freqs)) + for i, f := range freqs { + gzBank[i] = newGoertzel(f, float64(fs), winN) + } + return &twoToneDetector{ + fs: fs, + winN: winN, + hopN: hopN, + ratioThresh: ratioThresh, + rmsThresh: rmsThresh, + minAms: minAms, + minBms: minBms, + gapMaxMs: gapMaxMs, + freqs: freqs, + gzBank: gzBank, + } } +// stepWindow processes a window of PCM audio samples to detect a two-tone event. +// It applies a Hann window, computes the RMS, and searches for the strongest frequency. +// The function tracks the presence and duration of two distinct tones (A and B) separated by a gap. +// If both tones are detected with sufficient duration and within specified thresholds, it returns +// an event string ("TWO_TONE_DETECTED") along with the frequencies and durations of tones A and B. +// If detection criteria are not met, it resets the detector state and returns zero values. +// +// Parameters: +// +// pcms []int16 - Slice of PCM audio samples for the current window. +// t0 time.Time - Timestamp corresponding to the start of the window. +// +// Returns: +// +// event string - Event name if two-tone detected, otherwise empty string. +// aFreq float64 - Frequency of tone A (Hz). +// aDur float64 - Duration of tone A (milliseconds). +// bFreq float64 - Frequency of tone B (Hz). +// bDur float64 - Duration of tone B (milliseconds). func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) { - xi := pcmToFloat(pcms, d.winN) - windowHann(xi) + xi := pcmToFloat(pcms, d.winN) + windowHann(xi) - var total float64 - for _, v := range xi { - total += v * v - } + var total float64 + for _, v := range xi { + total += v * v + } - r := rmsPCM(pcms) - if r < d.rmsThresh { - d.reset() + r := rmsPCM(pcms) + if r < d.rmsThresh { + d.reset() + return "", 0, 0, 0, 0 + } + + // Find frequency with highest power + bestIdx := -1 + bestPow := 0.0 + for i, gz := range d.gzBank { + p := gz.Power(xi) + if p > bestPow { + bestPow = p + bestIdx = i + } + } + ratio := bestPow / (total + 1e-12) + if ratio < d.ratioThresh { + d.reset() + return "", 0, 0, 0, 0 + } + freq := d.freqs[bestIdx] + + hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs))) + now := t0 + + if !d.inA && !d.waitingB { + // Looking for Tone A + d.inA = true + d.aFreq = freq + d.aAccumMs = int(hopDur.Milliseconds()) + d.aStart = now + } else if d.inA && !d.waitingB { + // Confirming Tone A + if math.Abs(freq-d.aFreq) <= 10.0 { + d.aAccumMs += int(hopDur.Milliseconds()) + if d.aAccumMs >= d.minAms { + d.inA = false + d.waitingB = true + d.gapRemainMs = d.gapMaxMs + } + } else { + d.reset() + } + } else if d.waitingB { + d.gapRemainMs -= int(hopDur.Milliseconds()) + if d.gapRemainMs <= 0 { + d.reset() + } else if math.Abs(freq-d.aFreq) > 10.0 { + // Check for Tone B (different frequency) + if d.bAccumMs == 0 { + d.bFreq = freq + d.bStart = now + } else if math.Abs(freq-d.bFreq) > 10.0 { + // Switched to a different frequency, reset B + d.bFreq = freq + d.bAccumMs = 0 + d.bStart = now + } + d.bAccumMs += int(hopDur.Milliseconds()) + if d.bAccumMs >= d.minBms { + event = "TWO_TONE_DETECTED" + return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs) + } + } + } return "", 0, 0, 0, 0 - } - - // Find frequency with highest power - bestIdx := -1 - bestPow := 0.0 - for i, gz := range d.gzBank { - p := gz.Power(xi) - if p > bestPow { - bestPow = p - bestIdx = i - } - } - ratio := bestPow / (total + 1e-12) - if ratio < d.ratioThresh { - d.reset() - return "", 0, 0, 0, 0 - } - freq := d.freqs[bestIdx] - - hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs))) - now := t0 - - if !d.inA && !d.waitingB { - // Looking for Tone A - d.inA = true - d.aFreq = freq - d.aAccumMs = int(hopDur.Milliseconds()) - d.aStart = now - } else if d.inA && !d.waitingB { - // Confirming Tone A - if math.Abs(freq-d.aFreq) <= 10.0 { - d.aAccumMs += int(hopDur.Milliseconds()) - if d.aAccumMs >= d.minAms { - d.inA = false - d.waitingB = true - d.gapRemainMs = d.gapMaxMs - } - } else { - d.reset() - } - } else if d.waitingB { - d.gapRemainMs -= int(hopDur.Milliseconds()) - if d.gapRemainMs <= 0 { - d.reset() - } else if math.Abs(freq-d.aFreq) > 10.0 { - // Check for Tone B (different frequency) - if d.bAccumMs == 0 { - d.bFreq = freq - d.bStart = now - } else if math.Abs(freq-d.bFreq) > 10.0 { - // Switched to a different frequency, reset B - d.bFreq = freq - d.bAccumMs = 0 - d.bStart = now - } - d.bAccumMs += int(hopDur.Milliseconds()) - if d.bAccumMs >= d.minBms { - event = "TWO_TONE_DETECTED" - return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs) - } - } - } - return "", 0, 0, 0, 0 } +// reset reinitializes all internal state fields of the twoToneDetector to their default values. +// This includes clearing detection flags, frequencies, accumulated durations, start times, and gap timers. func (d *twoToneDetector) reset() { - d.inA = false - d.aFreq = 0 - d.aAccumMs = 0 - d.aStart = time.Time{} - d.waitingB = false - d.bFreq = 0 - d.bAccumMs = 0 - d.bStart = time.Time{} - d.gapRemainMs = 0 + d.inA = false + d.aFreq = 0 + d.aAccumMs = 0 + d.aStart = time.Time{} + d.waitingB = false + d.bFreq = 0 + d.bAccumMs = 0 + d.bStart = time.Time{} + d.gapRemainMs = 0 } func main() { - flag.Parse() - if *wavFile == "" { - log.Fatal("WAV file path is required (use -wav flag)") - } - - file, err := os.Open(*wavFile) - if err != nil { - log.Fatalf("Failed to open WAV file: %v", err) - } - defer file.Close() - - decoder := wav.NewDecoder(file) - if !decoder.IsValidFile() { - log.Fatal("Invalid WAV file") - } - if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 { - log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels", - decoder.Format().SampleRate, decoder.Format().NumChannels) - } - - const fs = 8000 - winN := int(float64(fs) * float64(*winMs) / 1000.0) - hopN := int(float64(fs) * float64(*hopMs) / 1000.0) - if winN <= 0 || hopN <= 0 || hopN > winN { - log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN) - } - - det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs) - - buf := &audio.IntBuffer{ - Format: &audio.Format{SampleRate: fs, NumChannels: 1}, - Data: make([]int, 8192), - SourceBitDepth: 16, - } - sampleCount := 0 - startTime := time.Now() - - log.Println("Processing WAV file...") - for { - n, err := decoder.PCMBuffer(buf) - if err != nil || n == 0 || len(buf.Data) == 0 { - log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs)) - break + flag.Parse() + if *wavFile == "" { + log.Fatal("WAV file path is required (use -wav flag)") } - pcm := make([]int16, n) - for i, v := range buf.Data[:n] { - pcm[i] = int16(v) + file, err := os.Open(*wavFile) + if err != nil { + log.Fatalf("Failed to open WAV file: %v", err) } - sampleCount += n + defer file.Close() - for offset := 0; offset <= len(pcm)-winN; offset += hopN { - win := pcm[offset:min(offset+winN, len(pcm))] - t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs)) - event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t) - if event != "" { - fmt.Printf("Detected two-tone sequence:\n") - fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur) - fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur) - det.reset() - } + decoder := wav.NewDecoder(file) + if !decoder.IsValidFile() { + log.Fatal("Invalid WAV file") + } + if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 { + log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels", + decoder.Format().SampleRate, decoder.Format().NumChannels) + } + + const fs = 8000 + winN := int(float64(fs) * float64(*winMs) / 1000.0) + hopN := int(float64(fs) * float64(*hopMs) / 1000.0) + if winN <= 0 || hopN <= 0 || hopN > winN { + log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN) + } + + det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs) + + buf := &audio.IntBuffer{ + Format: &audio.Format{SampleRate: fs, NumChannels: 1}, + Data: make([]int, 8192), + SourceBitDepth: 16, + } + sampleCount := 0 + startTime := time.Now() + + log.Println("Processing WAV file...") + for { + n, err := decoder.PCMBuffer(buf) + if err != nil || n == 0 || len(buf.Data) == 0 { + log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs)) + break + } + + pcm := make([]int16, n) + for i, v := range buf.Data[:n] { + pcm[i] = int16(v) + } + sampleCount += n + + for offset := 0; offset <= len(pcm)-winN; offset += hopN { + win := pcm[offset:min(offset+winN, len(pcm))] + t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs)) + event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t) + if event != "" { + fmt.Printf("Detected two-tone sequence:\n") + fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur) + fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur) + det.reset() + } + } } - } } +// min returns the smaller of two integer values a and b. func min(a, b int) int { - if a < b { - return a - } - return b + if a < b { + return a + } + return b }