Refactor main.go to integrate structured logging and enhance tone detection functionality

This commit is contained in:
2025-08-15 18:49:46 -04:00
parent b670654a4b
commit bd64dd24dc

151
main.go
View File

@ -1,10 +1,9 @@
// -wav=output.wav -minA=500 -minB=2000 -rms=10 -ratio=0.3
package main package main
import ( import (
"flag" "flag"
"fmt" "fmt"
"log" "log/slog"
"math" "math"
"os" "os"
"time" "time"
@ -23,6 +22,7 @@ var (
hopMs = flag.Int("hop", 50, "Hop size (ms)") hopMs = flag.Int("hop", 50, "Hop size (ms)")
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection") ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal") rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
verbose = flag.Bool("verbose", false, "Enable debug logging")
) )
// Goertzel struct for frequency detection // Goertzel struct for frequency detection
@ -34,15 +34,6 @@ type goertzel struct {
} }
// newGoertzel initializes and returns a new instance of the Goertzel algorithm for detecting a specific target frequency. // newGoertzel initializes and returns a new instance of the Goertzel algorithm for detecting a specific target frequency.
// Parameters:
//
// targetHz - the target frequency in Hertz to detect.
// fs - the sampling rate in Hertz.
// N - the number of samples to process.
//
// Returns:
//
// A pointer to a goertzel struct configured for the specified frequency and sample rate.
func newGoertzel(targetHz float64, fs float64, N int) *goertzel { func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
g := &goertzel{N: N, fs: fs} g := &goertzel{N: N, fs: fs}
g.k = int(0.5 + (float64(g.N)*targetHz)/fs) g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
@ -52,8 +43,6 @@ func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
} }
// Power computes the power of the target frequency in the input signal x using the Goertzel algorithm. // Power computes the power of the target frequency in the input signal x using the Goertzel algorithm.
// It processes the input slice x of length g.N and returns the squared magnitude of the frequency component
// specified by g.k. The function is typically used for efficient detection of specific frequencies in a signal.
func (g *goertzel) Power(x []float64) float64 { func (g *goertzel) Power(x []float64) float64 {
var s0, s1, s2 float64 var s0, s1, s2 float64
for i := 0; i < g.N; i++ { for i := 0; i < g.N; i++ {
@ -68,9 +57,6 @@ func (g *goertzel) Power(x []float64) float64 {
} }
// windowHann applies a Hann window to the input slice x in-place. // windowHann applies a Hann window to the input slice x in-place.
// The Hann window is commonly used in signal processing to reduce spectral leakage
// by tapering the beginning and end of the signal to zero.
// The function modifies the input slice directly.
func windowHann(x []float64) { func windowHann(x []float64) {
n := float64(len(x)) n := float64(len(x))
for i := range x { for i := range x {
@ -79,8 +65,6 @@ func windowHann(x []float64) {
} }
// pcmToFloat converts a slice of 16-bit PCM audio samples to a slice of float64 values. // pcmToFloat converts a slice of 16-bit PCM audio samples to a slice of float64 values.
// It processes up to N samples from the input buffer and returns the converted values.
// If the input buffer has fewer than N samples, only the available samples are converted.
func pcmToFloat(buf []int16, N int) []float64 { func pcmToFloat(buf []int16, N int) []float64 {
out := make([]float64, N) out := make([]float64, N)
for i := 0; i < N && i < len(buf); i++ { for i := 0; i < N && i < len(buf); i++ {
@ -90,8 +74,6 @@ func pcmToFloat(buf []int16, N int) []float64 {
} }
// rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples. // rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples.
// It returns the RMS as a float64, which is a measure of the signal's amplitude.
// If the input slice is empty, it returns 0.
func rmsPCM(buf []int16) float64 { func rmsPCM(buf []int16) float64 {
var s float64 var s float64
for _, v := range buf { for _, v := range buf {
@ -125,27 +107,11 @@ type twoToneDetector struct {
bAccumMs int bAccumMs int
bStart time.Time bStart time.Time
gapRemainMs int gapRemainMs int
logger *slog.Logger
} }
// newTwoToneDetector creates and initializes a twoToneDetector instance with the specified parameters. // newTwoToneDetector creates and initializes a twoToneDetector instance with the specified parameters.
// It sets up a bank of Goertzel filters for detecting tones in the frequency range 3003000 Hz (in 10 Hz steps). func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int, logger *slog.Logger) *twoToneDetector {
//
// Parameters:
//
// fs - Sample rate in Hz.
// winN - Window size (number of samples per analysis window).
// hopN - Hop size (number of samples to advance per analysis).
// ratioThresh- Threshold for the ratio used in tone detection.
// rmsThresh - RMS threshold for signal energy.
// minAms - Minimum duration of a detected tone in milliseconds.
// minBms - Minimum duration of a break between tones in milliseconds.
// gapMaxMs - Maximum allowed gap between tones in milliseconds.
//
// Returns:
//
// Pointer to an initialized twoToneDetector.
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector {
// Frequency range: 3003000 Hz, 10 Hz steps
freqs := make([]float64, 0) freqs := make([]float64, 0)
for f := 300.0; f <= 3000.0; f += 10.0 { for f := 300.0; f <= 3000.0; f += 10.0 {
freqs = append(freqs, f) freqs = append(freqs, f)
@ -165,29 +131,12 @@ func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minA
gapMaxMs: gapMaxMs, gapMaxMs: gapMaxMs,
freqs: freqs, freqs: freqs,
gzBank: gzBank, gzBank: gzBank,
logger: logger,
} }
} }
// stepWindow processes a window of PCM audio samples to detect a two-tone event. // stepWindow processes a window of PCM audio samples to detect a two-tone event.
// It applies a Hann window, computes the RMS, and searches for the strongest frequency. func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64, timestamp time.Time) {
// The function tracks the presence and duration of two distinct tones (A and B) separated by a gap.
// If both tones are detected with sufficient duration and within specified thresholds, it returns
// an event string ("TWO_TONE_DETECTED") along with the frequencies and durations of tones A and B.
// If detection criteria are not met, it resets the detector state and returns zero values.
//
// Parameters:
//
// pcms []int16 - Slice of PCM audio samples for the current window.
// t0 time.Time - Timestamp corresponding to the start of the window.
//
// Returns:
//
// event string - Event name if two-tone detected, otherwise empty string.
// aFreq float64 - Frequency of tone A (Hz).
// aDur float64 - Duration of tone A (milliseconds).
// bFreq float64 - Frequency of tone B (Hz).
// bDur float64 - Duration of tone B (milliseconds).
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) {
xi := pcmToFloat(pcms, d.winN) xi := pcmToFloat(pcms, d.winN)
windowHann(xi) windowHann(xi)
@ -197,9 +146,16 @@ func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string,
} }
r := rmsPCM(pcms) r := rmsPCM(pcms)
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
now := t0
if r < d.rmsThresh { if r < d.rmsThresh {
d.logger.Debug("RMS below threshold, resetting",
"time", now.Format(time.RFC3339),
"rms", fmt.Sprintf("%.2f", r),
"threshold", d.rmsThresh)
d.reset() d.reset()
return "", 0, 0, 0, 0 return "", 0, 0, 0, 0, time.Time{}
} }
// Find frequency with highest power // Find frequency with highest power
@ -214,14 +170,15 @@ func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string,
} }
ratio := bestPow / (total + 1e-12) ratio := bestPow / (total + 1e-12)
if ratio < d.ratioThresh { if ratio < d.ratioThresh {
d.logger.Debug("Ratio below threshold, resetting",
"time", now.Format(time.RFC3339),
"ratio", fmt.Sprintf("%.3f", ratio),
"threshold", d.ratioThresh)
d.reset() d.reset()
return "", 0, 0, 0, 0 return "", 0, 0, 0, 0, time.Time{}
} }
freq := d.freqs[bestIdx] freq := d.freqs[bestIdx]
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
now := t0
if !d.inA && !d.waitingB { if !d.inA && !d.waitingB {
// Looking for Tone A // Looking for Tone A
d.inA = true d.inA = true
@ -238,19 +195,29 @@ func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string,
d.gapRemainMs = d.gapMaxMs d.gapRemainMs = d.gapMaxMs
} }
} else { } else {
d.logger.Debug("Frequency differs from Tone A, resetting",
"time", now.Format(time.RFC3339),
"freq", fmt.Sprintf("%.1f", freq),
"tone_a_freq", fmt.Sprintf("%.1f", d.aFreq))
d.reset() d.reset()
} }
} else if d.waitingB { } else if d.waitingB {
d.gapRemainMs -= int(hopDur.Milliseconds()) d.gapRemainMs -= int(hopDur.Milliseconds())
if d.gapRemainMs <= 0 { if d.gapRemainMs <= 0 {
d.logger.Debug("Gap exceeded max duration, resetting",
"time", now.Format(time.RFC3339),
"gap_max_ms", d.gapMaxMs)
d.reset() d.reset()
} else if math.Abs(freq-d.aFreq) > 10.0 { } else if math.Abs(freq-d.aFreq) > 10.0 {
// Check for Tone B (different frequency) // Check for Tone B
if d.bAccumMs == 0 { if d.bAccumMs == 0 {
d.bFreq = freq d.bFreq = freq
d.bStart = now d.bStart = now
} else if math.Abs(freq-d.bFreq) > 10.0 { } else if math.Abs(freq-d.bFreq) > 10.0 {
// Switched to a different frequency, reset B d.logger.Debug("Frequency differs from Tone B, resetting B",
"time", now.Format(time.RFC3339),
"freq", fmt.Sprintf("%.1f", freq),
"tone_b_freq", fmt.Sprintf("%.1f", d.bFreq))
d.bFreq = freq d.bFreq = freq
d.bAccumMs = 0 d.bAccumMs = 0
d.bStart = now d.bStart = now
@ -258,15 +225,20 @@ func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string,
d.bAccumMs += int(hopDur.Milliseconds()) d.bAccumMs += int(hopDur.Milliseconds())
if d.bAccumMs >= d.minBms { if d.bAccumMs >= d.minBms {
event = "TWO_TONE_DETECTED" event = "TWO_TONE_DETECTED"
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs) d.logger.Info("Two-tone detected",
"time", now.Format(time.RFC3339),
"tone_a_freq", fmt.Sprintf("%.1f", d.aFreq),
"tone_a_duration_ms", d.aAccumMs,
"tone_b_freq", fmt.Sprintf("%.1f", d.bFreq),
"tone_b_duration_ms", d.bAccumMs)
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs), now
} }
} }
} }
return "", 0, 0, 0, 0 return "", 0, 0, 0, 0, time.Time{}
} }
// reset reinitializes all internal state fields of the twoToneDetector to their default values. // reset reinitializes all internal state fields of the twoToneDetector.
// This includes clearing detection flags, frequencies, accumulated durations, start times, and gap timers.
func (d *twoToneDetector) reset() { func (d *twoToneDetector) reset() {
d.inA = false d.inA = false
d.aFreq = 0 d.aFreq = 0
@ -281,33 +253,52 @@ func (d *twoToneDetector) reset() {
func main() { func main() {
flag.Parse() flag.Parse()
// Initialize slog logger
logLevel := &slog.LevelVar{}
logLevel.Set(slog.LevelInfo)
if *verbose {
logLevel.Set(slog.LevelDebug)
}
logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
Level: logLevel,
}))
if *wavFile == "" { if *wavFile == "" {
log.Fatal("WAV file path is required (use -wav flag)") logger.Error("WAV file path is required", "flag", "-wav")
os.Exit(1)
} }
file, err := os.Open(*wavFile) file, err := os.Open(*wavFile)
if err != nil { if err != nil {
log.Fatalf("Failed to open WAV file: %v", err) logger.Error("Failed to open WAV file", "error", err)
os.Exit(1)
} }
defer file.Close() defer file.Close()
decoder := wav.NewDecoder(file) decoder := wav.NewDecoder(file)
if !decoder.IsValidFile() { if !decoder.IsValidFile() {
log.Fatal("Invalid WAV file") logger.Error("Invalid WAV file")
os.Exit(1)
} }
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 { if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels", logger.Error("WAV file must be mono 8kHz",
decoder.Format().SampleRate, decoder.Format().NumChannels) "sample_rate", decoder.Format().SampleRate,
"channels", decoder.Format().NumChannels)
os.Exit(1)
} }
const fs = 8000 const fs = 8000
winN := int(float64(fs) * float64(*winMs) / 1000.0) winN := int(float64(fs) * float64(*winMs) / 1000.0)
hopN := int(float64(fs) * float64(*hopMs) / 1000.0) hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
if winN <= 0 || hopN <= 0 || hopN > winN { if winN <= 0 || hopN <= 0 || hopN > winN {
log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN) logger.Error("Invalid window/hop parameters",
"winN", winN,
"hopN", hopN)
os.Exit(1)
} }
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs) det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs, logger)
buf := &audio.IntBuffer{ buf := &audio.IntBuffer{
Format: &audio.Format{SampleRate: fs, NumChannels: 1}, Format: &audio.Format{SampleRate: fs, NumChannels: 1},
@ -317,11 +308,13 @@ func main() {
sampleCount := 0 sampleCount := 0
startTime := time.Now() startTime := time.Now()
log.Println("Processing WAV file...") logger.Info("Processing WAV file")
for { for {
n, err := decoder.PCMBuffer(buf) n, err := decoder.PCMBuffer(buf)
if err != nil || n == 0 || len(buf.Data) == 0 { if err != nil || n == 0 || len(buf.Data) == 0 {
log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs)) logger.Info("Finished processing",
"samples", sampleCount,
"duration_sec", fmt.Sprintf("%.2f", float64(sampleCount)/float64(fs)))
break break
} }
@ -334,9 +327,9 @@ func main() {
for offset := 0; offset <= len(pcm)-winN; offset += hopN { for offset := 0; offset <= len(pcm)-winN; offset += hopN {
win := pcm[offset:min(offset+winN, len(pcm))] win := pcm[offset:min(offset+winN, len(pcm))]
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs)) t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t) event, aFreq, aDur, bFreq, bDur, timestamp := det.stepWindow(win, t)
if event != "" { if event != "" {
fmt.Printf("Detected two-tone sequence:\n") fmt.Printf("Detected two-tone sequence at %s:\n", timestamp.Format(time.RFC3339))
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur) fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur) fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
det.reset() det.reset()