Refactor main.go for improved readability and add detailed comments for functions

2025-08-15 18:44:10 -04:00
parent aec3310ebf
commit b670654a4b
1 changed files with 298 additions and 236 deletions
--- a/main.go
+++ b/main.go
@ -2,291 +2,353 @@
 package main

 import (
-    "flag"
-    "fmt"
-    "log"
-    "math"
-    "os"
-    "time"
+	"flag"
+	"fmt"
+	"log"
+	"math"
+	"os"
+	"time"

-    "github.com/go-audio/audio"
-    "github.com/go-audio/wav"
+	"github.com/go-audio/audio"
+	"github.com/go-audio/wav"
 )

 // Command-line flags
 var (
-    wavFile     = flag.String("wav", "", "Path to mono 8kHz WAV file")
-    minAms      = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
-    minBms      = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
-    gapMaxMs    = flag.Int("gap", 5000, "Max gap between A and B (ms)")
-    winMs       = flag.Int("win", 100, "Window size (ms)")
-    hopMs       = flag.Int("hop", 50, "Hop size (ms)")
-    ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
-    rmsThresh   = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
+	wavFile     = flag.String("wav", "", "Path to mono 8kHz WAV file")
+	minAms      = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
+	minBms      = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
+	gapMaxMs    = flag.Int("gap", 5000, "Max gap between A and B (ms)")
+	winMs       = flag.Int("win", 100, "Window size (ms)")
+	hopMs       = flag.Int("hop", 50, "Hop size (ms)")
+	ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
+	rmsThresh   = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
 )

 // Goertzel struct for frequency detection
 type goertzel struct {
-    N     int
-    fs    float64
-    k     int
-    coeff float64
+	N     int
+	fs    float64
+	k     int
+	coeff float64
 }

+// newGoertzel initializes and returns a new instance of the Goertzel algorithm for detecting a specific target frequency.
+// Parameters:
+//
+//	targetHz - the target frequency in Hertz to detect.
+//	fs       - the sampling rate in Hertz.
+//	N        - the number of samples to process.
+//
+// Returns:
+//
+//	A pointer to a goertzel struct configured for the specified frequency and sample rate.
 func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
-    g := &goertzel{N: N, fs: fs}
-    g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
-    omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
-    g.coeff = 2.0 * math.Cos(omega)
-    return g
+	g := &goertzel{N: N, fs: fs}
+	g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
+	omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
+	g.coeff = 2.0 * math.Cos(omega)
+	return g
 }

+// Power computes the power of the target frequency in the input signal x using the Goertzel algorithm.
+// It processes the input slice x of length g.N and returns the squared magnitude of the frequency component
+// specified by g.k. The function is typically used for efficient detection of specific frequencies in a signal.
 func (g *goertzel) Power(x []float64) float64 {
-    var s0, s1, s2 float64
-    for i := 0; i < g.N; i++ {
-	s0 = x[i] + g.coeff*s1 - s2
-	s2 = s1
-	s1 = s0
-    }
-    omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
-    real := s1 - s2*math.Cos(omega)
-    imag := s2 * math.Sin(omega)
-    return real*real + imag*imag
+	var s0, s1, s2 float64
+	for i := 0; i < g.N; i++ {
+		s0 = x[i] + g.coeff*s1 - s2
+		s2 = s1
+		s1 = s0
+	}
+	omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
+	real := s1 - s2*math.Cos(omega)
+	imag := s2 * math.Sin(omega)
+	return real*real + imag*imag
 }

+// windowHann applies a Hann window to the input slice x in-place.
+// The Hann window is commonly used in signal processing to reduce spectral leakage
+// by tapering the beginning and end of the signal to zero.
+// The function modifies the input slice directly.
 func windowHann(x []float64) {
-    n := float64(len(x))
-    for i := range x {
-	x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
-    }
+	n := float64(len(x))
+	for i := range x {
+		x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
+	}
 }

+// pcmToFloat converts a slice of 16-bit PCM audio samples to a slice of float64 values.
+// It processes up to N samples from the input buffer and returns the converted values.
+// If the input buffer has fewer than N samples, only the available samples are converted.
 func pcmToFloat(buf []int16, N int) []float64 {
-    out := make([]float64, N)
-    for i := 0; i < N && i < len(buf); i++ {
-	out[i] = float64(buf[i])
-    }
-    return out
+	out := make([]float64, N)
+	for i := 0; i < N && i < len(buf); i++ {
+		out[i] = float64(buf[i])
+	}
+	return out
 }

+// rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples.
+// It returns the RMS as a float64, which is a measure of the signal's amplitude.
+// If the input slice is empty, it returns 0.
 func rmsPCM(buf []int16) float64 {
-    var s float64
-    for _, v := range buf {
-	f := float64(v)
-	s += f * f
-    }
-    if len(buf) == 0 {
-	return 0
-    }
-    return math.Sqrt(s / float64(len(buf)))
+	var s float64
+	for _, v := range buf {
+		f := float64(v)
+		s += f * f
+	}
+	if len(buf) == 0 {
+		return 0
+	}
+	return math.Sqrt(s / float64(len(buf)))
 }

 // twoToneDetector for detecting tone sequences
 type twoToneDetector struct {
-    fs          int
-    winN        int
-    hopN        int
-    ratioThresh float64
-    rmsThresh   float64
-    minAms      int
-    minBms      int
-    gapMaxMs    int
-    freqs       []float64
-    gzBank      []*goertzel
-    inA         bool
-    aFreq       float64
-    aAccumMs    int
-    aStart      time.Time
-    waitingB    bool
-    bFreq       float64
-    bAccumMs    int
-    bStart      time.Time
-    gapRemainMs int
+	fs          int
+	winN        int
+	hopN        int
+	ratioThresh float64
+	rmsThresh   float64
+	minAms      int
+	minBms      int
+	gapMaxMs    int
+	freqs       []float64
+	gzBank      []*goertzel
+	inA         bool
+	aFreq       float64
+	aAccumMs    int
+	aStart      time.Time
+	waitingB    bool
+	bFreq       float64
+	bAccumMs    int
+	bStart      time.Time
+	gapRemainMs int
 }

+// newTwoToneDetector creates and initializes a twoToneDetector instance with the specified parameters.
+// It sets up a bank of Goertzel filters for detecting tones in the frequency range 300–3000 Hz (in 10 Hz steps).
+//
+// Parameters:
+//
+//	fs         - Sample rate in Hz.
+//	winN       - Window size (number of samples per analysis window).
+//	hopN       - Hop size (number of samples to advance per analysis).
+//	ratioThresh- Threshold for the ratio used in tone detection.
+//	rmsThresh  - RMS threshold for signal energy.
+//	minAms     - Minimum duration of a detected tone in milliseconds.
+//	minBms     - Minimum duration of a break between tones in milliseconds.
+//	gapMaxMs   - Maximum allowed gap between tones in milliseconds.
+//
+// Returns:
+//
+//	Pointer to an initialized twoToneDetector.
 func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector {
-    // Frequency range: 300–3000 Hz, 10 Hz steps
-    freqs := make([]float64, 0)
-    for f := 300.0; f <= 3000.0; f += 10.0 {
-	freqs = append(freqs, f)
-    }
-    gzBank := make([]*goertzel, len(freqs))
-    for i, f := range freqs {
-	gzBank[i] = newGoertzel(f, float64(fs), winN)
-    }
-    return &twoToneDetector{
-	fs:          fs,
-	winN:        winN,
-	hopN:        hopN,
-	ratioThresh: ratioThresh,
-	rmsThresh:   rmsThresh,
-	minAms:      minAms,
-	minBms:      minBms,
-	gapMaxMs:    gapMaxMs,
-	freqs:       freqs,
-	gzBank:      gzBank,
-    }
+	// Frequency range: 300–3000 Hz, 10 Hz steps
+	freqs := make([]float64, 0)
+	for f := 300.0; f <= 3000.0; f += 10.0 {
+		freqs = append(freqs, f)
+	}
+	gzBank := make([]*goertzel, len(freqs))
+	for i, f := range freqs {
+		gzBank[i] = newGoertzel(f, float64(fs), winN)
+	}
+	return &twoToneDetector{
+		fs:          fs,
+		winN:        winN,
+		hopN:        hopN,
+		ratioThresh: ratioThresh,
+		rmsThresh:   rmsThresh,
+		minAms:      minAms,
+		minBms:      minBms,
+		gapMaxMs:    gapMaxMs,
+		freqs:       freqs,
+		gzBank:      gzBank,
+	}
 }

+// stepWindow processes a window of PCM audio samples to detect a two-tone event.
+// It applies a Hann window, computes the RMS, and searches for the strongest frequency.
+// The function tracks the presence and duration of two distinct tones (A and B) separated by a gap.
+// If both tones are detected with sufficient duration and within specified thresholds, it returns
+// an event string ("TWO_TONE_DETECTED") along with the frequencies and durations of tones A and B.
+// If detection criteria are not met, it resets the detector state and returns zero values.
+//
+// Parameters:
+//
+//	pcms []int16   - Slice of PCM audio samples for the current window.
+//	t0 time.Time   - Timestamp corresponding to the start of the window.
+//
+// Returns:
+//
+//	event string   - Event name if two-tone detected, otherwise empty string.
+//	aFreq float64  - Frequency of tone A (Hz).
+//	aDur float64   - Duration of tone A (milliseconds).
+//	bFreq float64  - Frequency of tone B (Hz).
+//	bDur float64   - Duration of tone B (milliseconds).
 func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) {
-    xi := pcmToFloat(pcms, d.winN)
-    windowHann(xi)
+	xi := pcmToFloat(pcms, d.winN)
+	windowHann(xi)

-    var total float64
-    for _, v := range xi {
-	total += v * v
-    }
+	var total float64
+	for _, v := range xi {
+		total += v * v
+	}

-    r := rmsPCM(pcms)
-    if r < d.rmsThresh {
-	d.reset()
+	r := rmsPCM(pcms)
+	if r < d.rmsThresh {
+		d.reset()
+		return "", 0, 0, 0, 0
+	}
+
+	// Find frequency with highest power
+	bestIdx := -1
+	bestPow := 0.0
+	for i, gz := range d.gzBank {
+		p := gz.Power(xi)
+		if p > bestPow {
+			bestPow = p
+			bestIdx = i
+		}
+	}
+	ratio := bestPow / (total + 1e-12)
+	if ratio < d.ratioThresh {
+		d.reset()
+		return "", 0, 0, 0, 0
+	}
+	freq := d.freqs[bestIdx]
+
+	hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
+	now := t0
+
+	if !d.inA && !d.waitingB {
+		// Looking for Tone A
+		d.inA = true
+		d.aFreq = freq
+		d.aAccumMs = int(hopDur.Milliseconds())
+		d.aStart = now
+	} else if d.inA && !d.waitingB {
+		// Confirming Tone A
+		if math.Abs(freq-d.aFreq) <= 10.0 {
+			d.aAccumMs += int(hopDur.Milliseconds())
+			if d.aAccumMs >= d.minAms {
+				d.inA = false
+				d.waitingB = true
+				d.gapRemainMs = d.gapMaxMs
+			}
+		} else {
+			d.reset()
+		}
+	} else if d.waitingB {
+		d.gapRemainMs -= int(hopDur.Milliseconds())
+		if d.gapRemainMs <= 0 {
+			d.reset()
+		} else if math.Abs(freq-d.aFreq) > 10.0 {
+			// Check for Tone B (different frequency)
+			if d.bAccumMs == 0 {
+				d.bFreq = freq
+				d.bStart = now
+			} else if math.Abs(freq-d.bFreq) > 10.0 {
+				// Switched to a different frequency, reset B
+				d.bFreq = freq
+				d.bAccumMs = 0
+				d.bStart = now
+			}
+			d.bAccumMs += int(hopDur.Milliseconds())
+			if d.bAccumMs >= d.minBms {
+				event = "TWO_TONE_DETECTED"
+				return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
+			}
+		}
+	}
 	return "", 0, 0, 0, 0
-    }
-
-    // Find frequency with highest power
-    bestIdx := -1
-    bestPow := 0.0
-    for i, gz := range d.gzBank {
-	p := gz.Power(xi)
-	if p > bestPow {
-	    bestPow = p
-	    bestIdx = i
-	}
-    }
-    ratio := bestPow / (total + 1e-12)
-    if ratio < d.ratioThresh {
-	d.reset()
-	return "", 0, 0, 0, 0
-    }
-    freq := d.freqs[bestIdx]
-
-    hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
-    now := t0
-
-    if !d.inA && !d.waitingB {
-	// Looking for Tone A
-	d.inA = true
-	d.aFreq = freq
-	d.aAccumMs = int(hopDur.Milliseconds())
-	d.aStart = now
-    } else if d.inA && !d.waitingB {
-	// Confirming Tone A
-	if math.Abs(freq-d.aFreq) <= 10.0 {
-	    d.aAccumMs += int(hopDur.Milliseconds())
-	    if d.aAccumMs >= d.minAms {
-		d.inA = false
-		d.waitingB = true
-		d.gapRemainMs = d.gapMaxMs
-	    }
-	} else {
-	    d.reset()
-	}
-    } else if d.waitingB {
-	d.gapRemainMs -= int(hopDur.Milliseconds())
-	if d.gapRemainMs <= 0 {
-	    d.reset()
-	} else if math.Abs(freq-d.aFreq) > 10.0 {
-	    // Check for Tone B (different frequency)
-	    if d.bAccumMs == 0 {
-		d.bFreq = freq
-		d.bStart = now
-	    } else if math.Abs(freq-d.bFreq) > 10.0 {
-		// Switched to a different frequency, reset B
-		d.bFreq = freq
-		d.bAccumMs = 0
-		d.bStart = now
-	    }
-	    d.bAccumMs += int(hopDur.Milliseconds())
-	    if d.bAccumMs >= d.minBms {
-		event = "TWO_TONE_DETECTED"
-		return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
-	    }
-	}
-    }
-    return "", 0, 0, 0, 0
 }

+// reset reinitializes all internal state fields of the twoToneDetector to their default values.
+// This includes clearing detection flags, frequencies, accumulated durations, start times, and gap timers.
 func (d *twoToneDetector) reset() {
-    d.inA = false
-    d.aFreq = 0
-    d.aAccumMs = 0
-    d.aStart = time.Time{}
-    d.waitingB = false
-    d.bFreq = 0
-    d.bAccumMs = 0
-    d.bStart = time.Time{}
-    d.gapRemainMs = 0
+	d.inA = false
+	d.aFreq = 0
+	d.aAccumMs = 0
+	d.aStart = time.Time{}
+	d.waitingB = false
+	d.bFreq = 0
+	d.bAccumMs = 0
+	d.bStart = time.Time{}
+	d.gapRemainMs = 0
 }

 func main() {
-    flag.Parse()
-    if *wavFile == "" {
-	log.Fatal("WAV file path is required (use -wav flag)")
-    }
-
-    file, err := os.Open(*wavFile)
-    if err != nil {
-	log.Fatalf("Failed to open WAV file: %v", err)
-    }
-    defer file.Close()
-
-    decoder := wav.NewDecoder(file)
-    if !decoder.IsValidFile() {
-	log.Fatal("Invalid WAV file")
-    }
-    if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
-	log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
-	    decoder.Format().SampleRate, decoder.Format().NumChannels)
-    }
-
-    const fs = 8000
-    winN := int(float64(fs) * float64(*winMs) / 1000.0)
-    hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
-    if winN <= 0 || hopN <= 0 || hopN > winN {
-	log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
-    }
-
-    det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
-
-    buf := &audio.IntBuffer{
-	Format:         &audio.Format{SampleRate: fs, NumChannels: 1},
-	Data:           make([]int, 8192),
-	SourceBitDepth: 16,
-    }
-    sampleCount := 0
-    startTime := time.Now()
-
-    log.Println("Processing WAV file...")
-    for {
-	n, err := decoder.PCMBuffer(buf)
-	if err != nil || n == 0 || len(buf.Data) == 0 {
-	    log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
-	    break
+	flag.Parse()
+	if *wavFile == "" {
+		log.Fatal("WAV file path is required (use -wav flag)")
 	}

-	pcm := make([]int16, n)
-	for i, v := range buf.Data[:n] {
-	    pcm[i] = int16(v)
+	file, err := os.Open(*wavFile)
+	if err != nil {
+		log.Fatalf("Failed to open WAV file: %v", err)
 	}
-	sampleCount += n
+	defer file.Close()

-	for offset := 0; offset <= len(pcm)-winN; offset += hopN {
-	    win := pcm[offset:min(offset+winN, len(pcm))]
-	    t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
-	    event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
-	    if event != "" {
-		fmt.Printf("Detected two-tone sequence:\n")
-		fmt.Printf("  Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
-		fmt.Printf("  Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
-		det.reset()
-	    }
+	decoder := wav.NewDecoder(file)
+	if !decoder.IsValidFile() {
+		log.Fatal("Invalid WAV file")
+	}
+	if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
+		log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
+			decoder.Format().SampleRate, decoder.Format().NumChannels)
+	}
+
+	const fs = 8000
+	winN := int(float64(fs) * float64(*winMs) / 1000.0)
+	hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
+	if winN <= 0 || hopN <= 0 || hopN > winN {
+		log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
+	}
+
+	det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
+
+	buf := &audio.IntBuffer{
+		Format:         &audio.Format{SampleRate: fs, NumChannels: 1},
+		Data:           make([]int, 8192),
+		SourceBitDepth: 16,
+	}
+	sampleCount := 0
+	startTime := time.Now()
+
+	log.Println("Processing WAV file...")
+	for {
+		n, err := decoder.PCMBuffer(buf)
+		if err != nil || n == 0 || len(buf.Data) == 0 {
+			log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
+			break
+		}
+
+		pcm := make([]int16, n)
+		for i, v := range buf.Data[:n] {
+			pcm[i] = int16(v)
+		}
+		sampleCount += n
+
+		for offset := 0; offset <= len(pcm)-winN; offset += hopN {
+			win := pcm[offset:min(offset+winN, len(pcm))]
+			t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
+			event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
+			if event != "" {
+				fmt.Printf("Detected two-tone sequence:\n")
+				fmt.Printf("  Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
+				fmt.Printf("  Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
+				det.reset()
+			}
+		}
 	}
-    }
 }

+// min returns the smaller of two integer values a and b.
 func min(a, b int) int {
-    if a < b {
-	return a
-    }
-    return b
+	if a < b {
+		return a
+	}
+	return b
 }