From bd64dd24dc87539c5e94a06f12831076188bef6b Mon Sep 17 00:00:00 2001 From: Alex Savin Date: Fri, 15 Aug 2025 18:49:46 -0400 Subject: [PATCH] Refactor main.go to integrate structured logging and enhance tone detection functionality --- main.go | 151 +++++++++++++++++++++++++++----------------------------- 1 file changed, 72 insertions(+), 79 deletions(-) diff --git a/main.go b/main.go index 57b3d78..46523d5 100644 --- a/main.go +++ b/main.go @@ -1,10 +1,9 @@ -// -wav=output.wav -minA=500 -minB=2000 -rms=10 -ratio=0.3 package main import ( "flag" "fmt" - "log" + "log/slog" "math" "os" "time" @@ -23,6 +22,7 @@ var ( hopMs = flag.Int("hop", 50, "Hop size (ms)") ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection") rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal") + verbose = flag.Bool("verbose", false, "Enable debug logging") ) // Goertzel struct for frequency detection @@ -34,15 +34,6 @@ type goertzel struct { } // newGoertzel initializes and returns a new instance of the Goertzel algorithm for detecting a specific target frequency. -// Parameters: -// -// targetHz - the target frequency in Hertz to detect. -// fs - the sampling rate in Hertz. -// N - the number of samples to process. -// -// Returns: -// -// A pointer to a goertzel struct configured for the specified frequency and sample rate. func newGoertzel(targetHz float64, fs float64, N int) *goertzel { g := &goertzel{N: N, fs: fs} g.k = int(0.5 + (float64(g.N)*targetHz)/fs) @@ -52,8 +43,6 @@ func newGoertzel(targetHz float64, fs float64, N int) *goertzel { } // Power computes the power of the target frequency in the input signal x using the Goertzel algorithm. -// It processes the input slice x of length g.N and returns the squared magnitude of the frequency component -// specified by g.k. The function is typically used for efficient detection of specific frequencies in a signal. func (g *goertzel) Power(x []float64) float64 { var s0, s1, s2 float64 for i := 0; i < g.N; i++ { @@ -68,9 +57,6 @@ func (g *goertzel) Power(x []float64) float64 { } // windowHann applies a Hann window to the input slice x in-place. -// The Hann window is commonly used in signal processing to reduce spectral leakage -// by tapering the beginning and end of the signal to zero. -// The function modifies the input slice directly. func windowHann(x []float64) { n := float64(len(x)) for i := range x { @@ -79,8 +65,6 @@ func windowHann(x []float64) { } // pcmToFloat converts a slice of 16-bit PCM audio samples to a slice of float64 values. -// It processes up to N samples from the input buffer and returns the converted values. -// If the input buffer has fewer than N samples, only the available samples are converted. func pcmToFloat(buf []int16, N int) []float64 { out := make([]float64, N) for i := 0; i < N && i < len(buf); i++ { @@ -90,8 +74,6 @@ func pcmToFloat(buf []int16, N int) []float64 { } // rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples. -// It returns the RMS as a float64, which is a measure of the signal's amplitude. -// If the input slice is empty, it returns 0. func rmsPCM(buf []int16) float64 { var s float64 for _, v := range buf { @@ -125,27 +107,11 @@ type twoToneDetector struct { bAccumMs int bStart time.Time gapRemainMs int + logger *slog.Logger } // newTwoToneDetector creates and initializes a twoToneDetector instance with the specified parameters. -// It sets up a bank of Goertzel filters for detecting tones in the frequency range 300–3000 Hz (in 10 Hz steps). -// -// Parameters: -// -// fs - Sample rate in Hz. -// winN - Window size (number of samples per analysis window). -// hopN - Hop size (number of samples to advance per analysis). -// ratioThresh- Threshold for the ratio used in tone detection. -// rmsThresh - RMS threshold for signal energy. -// minAms - Minimum duration of a detected tone in milliseconds. -// minBms - Minimum duration of a break between tones in milliseconds. -// gapMaxMs - Maximum allowed gap between tones in milliseconds. -// -// Returns: -// -// Pointer to an initialized twoToneDetector. -func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector { - // Frequency range: 300–3000 Hz, 10 Hz steps +func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int, logger *slog.Logger) *twoToneDetector { freqs := make([]float64, 0) for f := 300.0; f <= 3000.0; f += 10.0 { freqs = append(freqs, f) @@ -165,29 +131,12 @@ func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minA gapMaxMs: gapMaxMs, freqs: freqs, gzBank: gzBank, + logger: logger, } } // stepWindow processes a window of PCM audio samples to detect a two-tone event. -// It applies a Hann window, computes the RMS, and searches for the strongest frequency. -// The function tracks the presence and duration of two distinct tones (A and B) separated by a gap. -// If both tones are detected with sufficient duration and within specified thresholds, it returns -// an event string ("TWO_TONE_DETECTED") along with the frequencies and durations of tones A and B. -// If detection criteria are not met, it resets the detector state and returns zero values. -// -// Parameters: -// -// pcms []int16 - Slice of PCM audio samples for the current window. -// t0 time.Time - Timestamp corresponding to the start of the window. -// -// Returns: -// -// event string - Event name if two-tone detected, otherwise empty string. -// aFreq float64 - Frequency of tone A (Hz). -// aDur float64 - Duration of tone A (milliseconds). -// bFreq float64 - Frequency of tone B (Hz). -// bDur float64 - Duration of tone B (milliseconds). -func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) { +func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64, timestamp time.Time) { xi := pcmToFloat(pcms, d.winN) windowHann(xi) @@ -197,9 +146,16 @@ func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, } r := rmsPCM(pcms) + hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs))) + now := t0 + if r < d.rmsThresh { + d.logger.Debug("RMS below threshold, resetting", + "time", now.Format(time.RFC3339), + "rms", fmt.Sprintf("%.2f", r), + "threshold", d.rmsThresh) d.reset() - return "", 0, 0, 0, 0 + return "", 0, 0, 0, 0, time.Time{} } // Find frequency with highest power @@ -214,14 +170,15 @@ func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, } ratio := bestPow / (total + 1e-12) if ratio < d.ratioThresh { + d.logger.Debug("Ratio below threshold, resetting", + "time", now.Format(time.RFC3339), + "ratio", fmt.Sprintf("%.3f", ratio), + "threshold", d.ratioThresh) d.reset() - return "", 0, 0, 0, 0 + return "", 0, 0, 0, 0, time.Time{} } freq := d.freqs[bestIdx] - hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs))) - now := t0 - if !d.inA && !d.waitingB { // Looking for Tone A d.inA = true @@ -238,19 +195,29 @@ func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, d.gapRemainMs = d.gapMaxMs } } else { + d.logger.Debug("Frequency differs from Tone A, resetting", + "time", now.Format(time.RFC3339), + "freq", fmt.Sprintf("%.1f", freq), + "tone_a_freq", fmt.Sprintf("%.1f", d.aFreq)) d.reset() } } else if d.waitingB { d.gapRemainMs -= int(hopDur.Milliseconds()) if d.gapRemainMs <= 0 { + d.logger.Debug("Gap exceeded max duration, resetting", + "time", now.Format(time.RFC3339), + "gap_max_ms", d.gapMaxMs) d.reset() } else if math.Abs(freq-d.aFreq) > 10.0 { - // Check for Tone B (different frequency) + // Check for Tone B if d.bAccumMs == 0 { d.bFreq = freq d.bStart = now } else if math.Abs(freq-d.bFreq) > 10.0 { - // Switched to a different frequency, reset B + d.logger.Debug("Frequency differs from Tone B, resetting B", + "time", now.Format(time.RFC3339), + "freq", fmt.Sprintf("%.1f", freq), + "tone_b_freq", fmt.Sprintf("%.1f", d.bFreq)) d.bFreq = freq d.bAccumMs = 0 d.bStart = now @@ -258,15 +225,20 @@ func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, d.bAccumMs += int(hopDur.Milliseconds()) if d.bAccumMs >= d.minBms { event = "TWO_TONE_DETECTED" - return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs) + d.logger.Info("Two-tone detected", + "time", now.Format(time.RFC3339), + "tone_a_freq", fmt.Sprintf("%.1f", d.aFreq), + "tone_a_duration_ms", d.aAccumMs, + "tone_b_freq", fmt.Sprintf("%.1f", d.bFreq), + "tone_b_duration_ms", d.bAccumMs) + return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs), now } } } - return "", 0, 0, 0, 0 + return "", 0, 0, 0, 0, time.Time{} } -// reset reinitializes all internal state fields of the twoToneDetector to their default values. -// This includes clearing detection flags, frequencies, accumulated durations, start times, and gap timers. +// reset reinitializes all internal state fields of the twoToneDetector. func (d *twoToneDetector) reset() { d.inA = false d.aFreq = 0 @@ -281,33 +253,52 @@ func (d *twoToneDetector) reset() { func main() { flag.Parse() + + // Initialize slog logger + logLevel := &slog.LevelVar{} + logLevel.Set(slog.LevelInfo) + if *verbose { + logLevel.Set(slog.LevelDebug) + } + logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{ + Level: logLevel, + })) + if *wavFile == "" { - log.Fatal("WAV file path is required (use -wav flag)") + logger.Error("WAV file path is required", "flag", "-wav") + os.Exit(1) } file, err := os.Open(*wavFile) if err != nil { - log.Fatalf("Failed to open WAV file: %v", err) + logger.Error("Failed to open WAV file", "error", err) + os.Exit(1) } defer file.Close() decoder := wav.NewDecoder(file) if !decoder.IsValidFile() { - log.Fatal("Invalid WAV file") + logger.Error("Invalid WAV file") + os.Exit(1) } if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 { - log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels", - decoder.Format().SampleRate, decoder.Format().NumChannels) + logger.Error("WAV file must be mono 8kHz", + "sample_rate", decoder.Format().SampleRate, + "channels", decoder.Format().NumChannels) + os.Exit(1) } const fs = 8000 winN := int(float64(fs) * float64(*winMs) / 1000.0) hopN := int(float64(fs) * float64(*hopMs) / 1000.0) if winN <= 0 || hopN <= 0 || hopN > winN { - log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN) + logger.Error("Invalid window/hop parameters", + "winN", winN, + "hopN", hopN) + os.Exit(1) } - det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs) + det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs, logger) buf := &audio.IntBuffer{ Format: &audio.Format{SampleRate: fs, NumChannels: 1}, @@ -317,11 +308,13 @@ func main() { sampleCount := 0 startTime := time.Now() - log.Println("Processing WAV file...") + logger.Info("Processing WAV file") for { n, err := decoder.PCMBuffer(buf) if err != nil || n == 0 || len(buf.Data) == 0 { - log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs)) + logger.Info("Finished processing", + "samples", sampleCount, + "duration_sec", fmt.Sprintf("%.2f", float64(sampleCount)/float64(fs))) break } @@ -334,9 +327,9 @@ func main() { for offset := 0; offset <= len(pcm)-winN; offset += hopN { win := pcm[offset:min(offset+winN, len(pcm))] t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs)) - event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t) + event, aFreq, aDur, bFreq, bDur, timestamp := det.stepWindow(win, t) if event != "" { - fmt.Printf("Detected two-tone sequence:\n") + fmt.Printf("Detected two-tone sequence at %s:\n", timestamp.Format(time.RFC3339)) fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur) fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur) det.reset()