Refactor main.go for improved readability and add detailed comments for functions

2025-08-15 18:44:10 -04:00
parent aec3310ebf
commit b670654a4b
1 changed files with 298 additions and 236 deletions
--- a/main.go
+++ b/main.go
@ -2,291 +2,353 @@
 package main
 import (
-    "flag"
+	"flag"
-    "fmt"
+	"fmt"
-    "log"
+	"log"
-    "math"
+	"math"
-    "os"
+	"os"
-    "time"
+	"time"
-    "github.com/go-audio/audio"
+	"github.com/go-audio/audio"
-    "github.com/go-audio/wav"
+	"github.com/go-audio/wav"
 )
 // Command-line flags
 var (
-    wavFile     = flag.String("wav", "", "Path to mono 8kHz WAV file")
+	wavFile     = flag.String("wav", "", "Path to mono 8kHz WAV file")
-    minAms      = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
+	minAms      = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
-    minBms      = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
+	minBms      = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
-    gapMaxMs    = flag.Int("gap", 5000, "Max gap between A and B (ms)")
+	gapMaxMs    = flag.Int("gap", 5000, "Max gap between A and B (ms)")
-    winMs       = flag.Int("win", 100, "Window size (ms)")
+	winMs       = flag.Int("win", 100, "Window size (ms)")
-    hopMs       = flag.Int("hop", 50, "Hop size (ms)")
+	hopMs       = flag.Int("hop", 50, "Hop size (ms)")
-    ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
+	ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
-    rmsThresh   = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
+	rmsThresh   = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
 )
 // Goertzel struct for frequency detection
 type goertzel struct {
-    N     int
+	N     int
-    fs    float64
+	fs    float64
-    k     int
+	k     int
-    coeff float64
+	coeff float64
 }
 // newGoertzel initializes and returns a new instance of the Goertzel algorithm for detecting a specific target frequency.
 // Parameters:
 //
 //	targetHz - the target frequency in Hertz to detect.
 //	fs       - the sampling rate in Hertz.
 //	N        - the number of samples to process.
 //
 // Returns:
 //
 //	A pointer to a goertzel struct configured for the specified frequency and sample rate.
 func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
-    g := &goertzel{N: N, fs: fs}
+	g := &goertzel{N: N, fs: fs}
-    g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
+	g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
-    omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
+	omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
-    g.coeff = 2.0 * math.Cos(omega)
+	g.coeff = 2.0 * math.Cos(omega)
-    return g
+	return g
 }
 // Power computes the power of the target frequency in the input signal x using the Goertzel algorithm.
 // It processes the input slice x of length g.N and returns the squared magnitude of the frequency component
 // specified by g.k. The function is typically used for efficient detection of specific frequencies in a signal.
 func (g *goertzel) Power(x []float64) float64 {
-    var s0, s1, s2 float64
+	var s0, s1, s2 float64
-    for i := 0; i < g.N; i++ {
+	for i := 0; i < g.N; i++ {
-	s0 = x[i] + g.coeff*s1 - s2
+		s0 = x[i] + g.coeff*s1 - s2
-	s2 = s1
+		s2 = s1
-	s1 = s0
+		s1 = s0
-    }
+	}
-    omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
+	omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
-    real := s1 - s2*math.Cos(omega)
+	real := s1 - s2*math.Cos(omega)
-    imag := s2 * math.Sin(omega)
+	imag := s2 * math.Sin(omega)
-    return real*real + imag*imag
+	return real*real + imag*imag
 }
 // windowHann applies a Hann window to the input slice x in-place.
 // The Hann window is commonly used in signal processing to reduce spectral leakage
 // by tapering the beginning and end of the signal to zero.
 // The function modifies the input slice directly.
 func windowHann(x []float64) {
-    n := float64(len(x))
+	n := float64(len(x))
-    for i := range x {
+	for i := range x {
-	x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
+		x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
-    }
+	}
 }
 // pcmToFloat converts a slice of 16-bit PCM audio samples to a slice of float64 values.
 // It processes up to N samples from the input buffer and returns the converted values.
 // If the input buffer has fewer than N samples, only the available samples are converted.
 func pcmToFloat(buf []int16, N int) []float64 {
-    out := make([]float64, N)
+	out := make([]float64, N)
-    for i := 0; i < N && i < len(buf); i++ {
+	for i := 0; i < N && i < len(buf); i++ {
-	out[i] = float64(buf[i])
+		out[i] = float64(buf[i])
-    }
+	}
-    return out
+	return out
 }
 // rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples.
 // It returns the RMS as a float64, which is a measure of the signal's amplitude.
 // If the input slice is empty, it returns 0.
 func rmsPCM(buf []int16) float64 {
-    var s float64
+	var s float64
-    for _, v := range buf {
+	for _, v := range buf {
-	f := float64(v)
+		f := float64(v)
-	s += f * f
+		s += f * f
-    }
+	}
-    if len(buf) == 0 {
+	if len(buf) == 0 {
-	return 0
+		return 0
-    }
+	}
-    return math.Sqrt(s / float64(len(buf)))
+	return math.Sqrt(s / float64(len(buf)))
 }
 // twoToneDetector for detecting tone sequences
 type twoToneDetector struct {
-    fs          int
+	fs          int
-    winN        int
+	winN        int
-    hopN        int
+	hopN        int
-    ratioThresh float64
+	ratioThresh float64
-    rmsThresh   float64
+	rmsThresh   float64
-    minAms      int
+	minAms      int
-    minBms      int
+	minBms      int
-    gapMaxMs    int
+	gapMaxMs    int
-    freqs       []float64
+	freqs       []float64
-    gzBank      []*goertzel
+	gzBank      []*goertzel
-    inA         bool
+	inA         bool
-    aFreq       float64
+	aFreq       float64
-    aAccumMs    int
+	aAccumMs    int
-    aStart      time.Time
+	aStart      time.Time
-    waitingB    bool
+	waitingB    bool
-    bFreq       float64
+	bFreq       float64
-    bAccumMs    int
+	bAccumMs    int
-    bStart      time.Time
+	bStart      time.Time
-    gapRemainMs int
+	gapRemainMs int
 }
 // newTwoToneDetector creates and initializes a twoToneDetector instance with the specified parameters.
 // It sets up a bank of Goertzel filters for detecting tones in the frequency range 300–3000 Hz (in 10 Hz steps).
 //
 // Parameters:
 //
 //	fs         - Sample rate in Hz.
 //	winN       - Window size (number of samples per analysis window).
 //	hopN       - Hop size (number of samples to advance per analysis).
 //	ratioThresh- Threshold for the ratio used in tone detection.
 //	rmsThresh  - RMS threshold for signal energy.
 //	minAms     - Minimum duration of a detected tone in milliseconds.
 //	minBms     - Minimum duration of a break between tones in milliseconds.
 //	gapMaxMs   - Maximum allowed gap between tones in milliseconds.
 //
 // Returns:
 //
 //	Pointer to an initialized twoToneDetector.
 func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector {
-    // Frequency range: 300–3000 Hz, 10 Hz steps
+	// Frequency range: 300–3000 Hz, 10 Hz steps
-    freqs := make([]float64, 0)
+	freqs := make([]float64, 0)
-    for f := 300.0; f <= 3000.0; f += 10.0 {
+	for f := 300.0; f <= 3000.0; f += 10.0 {
-	freqs = append(freqs, f)
+		freqs = append(freqs, f)
-    }
+	}
-    gzBank := make([]*goertzel, len(freqs))
+	gzBank := make([]*goertzel, len(freqs))
-    for i, f := range freqs {
+	for i, f := range freqs {
-	gzBank[i] = newGoertzel(f, float64(fs), winN)
+		gzBank[i] = newGoertzel(f, float64(fs), winN)
-    }
+	}
-    return &twoToneDetector{
+	return &twoToneDetector{
-	fs:          fs,
+		fs:          fs,
-	winN:        winN,
+		winN:        winN,
-	hopN:        hopN,
+		hopN:        hopN,
-	ratioThresh: ratioThresh,
+		ratioThresh: ratioThresh,
-	rmsThresh:   rmsThresh,
+		rmsThresh:   rmsThresh,
-	minAms:      minAms,
+		minAms:      minAms,
-	minBms:      minBms,
+		minBms:      minBms,
-	gapMaxMs:    gapMaxMs,
+		gapMaxMs:    gapMaxMs,
-	freqs:       freqs,
+		freqs:       freqs,
-	gzBank:      gzBank,
+		gzBank:      gzBank,
-    }
+	}
 }
 // stepWindow processes a window of PCM audio samples to detect a two-tone event.
 // It applies a Hann window, computes the RMS, and searches for the strongest frequency.
 // The function tracks the presence and duration of two distinct tones (A and B) separated by a gap.
 // If both tones are detected with sufficient duration and within specified thresholds, it returns
 // an event string ("TWO_TONE_DETECTED") along with the frequencies and durations of tones A and B.
 // If detection criteria are not met, it resets the detector state and returns zero values.
 //
 // Parameters:
 //
 //	pcms []int16   - Slice of PCM audio samples for the current window.
 //	t0 time.Time   - Timestamp corresponding to the start of the window.
 //
 // Returns:
 //
 //	event string   - Event name if two-tone detected, otherwise empty string.
 //	aFreq float64  - Frequency of tone A (Hz).
 //	aDur float64   - Duration of tone A (milliseconds).
 //	bFreq float64  - Frequency of tone B (Hz).
 //	bDur float64   - Duration of tone B (milliseconds).
 func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) {
-    xi := pcmToFloat(pcms, d.winN)
+	xi := pcmToFloat(pcms, d.winN)
-    windowHann(xi)
+	windowHann(xi)
-    var total float64
+	var total float64
-    for _, v := range xi {
+	for _, v := range xi {
-	total += v * v
+		total += v * v
-    }
+	}
-    r := rmsPCM(pcms)
+	r := rmsPCM(pcms)
-    if r < d.rmsThresh {
+	if r < d.rmsThresh {
-	d.reset()
+		d.reset()
 		return "", 0, 0, 0, 0
 	}
 	// Find frequency with highest power
 	bestIdx := -1
 	bestPow := 0.0
 	for i, gz := range d.gzBank {
 		p := gz.Power(xi)
 		if p > bestPow {
 			bestPow = p
 			bestIdx = i
 		}
 	}
 	ratio := bestPow / (total + 1e-12)
 	if ratio < d.ratioThresh {
 		d.reset()
 		return "", 0, 0, 0, 0
 	}
 	freq := d.freqs[bestIdx]
 	hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
 	now := t0
 	if !d.inA && !d.waitingB {
 		// Looking for Tone A
 		d.inA = true
 		d.aFreq = freq
 		d.aAccumMs = int(hopDur.Milliseconds())
 		d.aStart = now
 	} else if d.inA && !d.waitingB {
 		// Confirming Tone A
 		if math.Abs(freq-d.aFreq) <= 10.0 {
 			d.aAccumMs += int(hopDur.Milliseconds())
 			if d.aAccumMs >= d.minAms {
 				d.inA = false
 				d.waitingB = true
 				d.gapRemainMs = d.gapMaxMs
 			}
 		} else {
 			d.reset()
 		}
 	} else if d.waitingB {
 		d.gapRemainMs -= int(hopDur.Milliseconds())
 		if d.gapRemainMs <= 0 {
 			d.reset()
 		} else if math.Abs(freq-d.aFreq) > 10.0 {
 			// Check for Tone B (different frequency)
 			if d.bAccumMs == 0 {
 				d.bFreq = freq
 				d.bStart = now
 			} else if math.Abs(freq-d.bFreq) > 10.0 {
 				// Switched to a different frequency, reset B
 				d.bFreq = freq
 				d.bAccumMs = 0
 				d.bStart = now
 			}
 			d.bAccumMs += int(hopDur.Milliseconds())
 			if d.bAccumMs >= d.minBms {
 				event = "TWO_TONE_DETECTED"
 				return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
 			}
 		}
 	}
 	return "", 0, 0, 0, 0
    }
    // Find frequency with highest power
    bestIdx := -1
    bestPow := 0.0
    for i, gz := range d.gzBank {
 	p := gz.Power(xi)
 	if p > bestPow {
 	    bestPow = p
 	    bestIdx = i
 	}
    }
    ratio := bestPow / (total + 1e-12)
    if ratio < d.ratioThresh {
 	d.reset()
 	return "", 0, 0, 0, 0
    }
    freq := d.freqs[bestIdx]
    hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
    now := t0
    if !d.inA && !d.waitingB {
 	// Looking for Tone A
 	d.inA = true
 	d.aFreq = freq
 	d.aAccumMs = int(hopDur.Milliseconds())
 	d.aStart = now
    } else if d.inA && !d.waitingB {
 	// Confirming Tone A
 	if math.Abs(freq-d.aFreq) <= 10.0 {
 	    d.aAccumMs += int(hopDur.Milliseconds())
 	    if d.aAccumMs >= d.minAms {
 		d.inA = false
 		d.waitingB = true
 		d.gapRemainMs = d.gapMaxMs
 	    }
 	} else {
 	    d.reset()
 	}
    } else if d.waitingB {
 	d.gapRemainMs -= int(hopDur.Milliseconds())
 	if d.gapRemainMs <= 0 {
 	    d.reset()
 	} else if math.Abs(freq-d.aFreq) > 10.0 {
 	    // Check for Tone B (different frequency)
 	    if d.bAccumMs == 0 {
 		d.bFreq = freq
 		d.bStart = now
 	    } else if math.Abs(freq-d.bFreq) > 10.0 {
 		// Switched to a different frequency, reset B
 		d.bFreq = freq
 		d.bAccumMs = 0
 		d.bStart = now
 	    }
 	    d.bAccumMs += int(hopDur.Milliseconds())
 	    if d.bAccumMs >= d.minBms {
 		event = "TWO_TONE_DETECTED"
 		return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
 	    }
 	}
    }
    return "", 0, 0, 0, 0
 }
 // reset reinitializes all internal state fields of the twoToneDetector to their default values.
 // This includes clearing detection flags, frequencies, accumulated durations, start times, and gap timers.
 func (d *twoToneDetector) reset() {
-    d.inA = false
+	d.inA = false
-    d.aFreq = 0
+	d.aFreq = 0
-    d.aAccumMs = 0
+	d.aAccumMs = 0
-    d.aStart = time.Time{}
+	d.aStart = time.Time{}
-    d.waitingB = false
+	d.waitingB = false
-    d.bFreq = 0
+	d.bFreq = 0
-    d.bAccumMs = 0
+	d.bAccumMs = 0
-    d.bStart = time.Time{}
+	d.bStart = time.Time{}
-    d.gapRemainMs = 0
+	d.gapRemainMs = 0
 }
 func main() {
-    flag.Parse()
+	flag.Parse()
-    if *wavFile == "" {
+	if *wavFile == "" {
-	log.Fatal("WAV file path is required (use -wav flag)")
+		log.Fatal("WAV file path is required (use -wav flag)")
    }
    file, err := os.Open(*wavFile)
    if err != nil {
 	log.Fatalf("Failed to open WAV file: %v", err)
    }
    defer file.Close()
    decoder := wav.NewDecoder(file)
    if !decoder.IsValidFile() {
 	log.Fatal("Invalid WAV file")
    }
    if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
 	log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
 	    decoder.Format().SampleRate, decoder.Format().NumChannels)
    }
    const fs = 8000
    winN := int(float64(fs) * float64(*winMs) / 1000.0)
    hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
    if winN <= 0 || hopN <= 0 || hopN > winN {
 	log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
    }
    det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
    buf := &audio.IntBuffer{
 	Format:         &audio.Format{SampleRate: fs, NumChannels: 1},
 	Data:           make([]int, 8192),
 	SourceBitDepth: 16,
    }
    sampleCount := 0
    startTime := time.Now()
    log.Println("Processing WAV file...")
    for {
 	n, err := decoder.PCMBuffer(buf)
 	if err != nil || n == 0 || len(buf.Data) == 0 {
 	    log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
 	    break
 	}
-	pcm := make([]int16, n)
+	file, err := os.Open(*wavFile)
-	for i, v := range buf.Data[:n] {
+	if err != nil {
-	    pcm[i] = int16(v)
+		log.Fatalf("Failed to open WAV file: %v", err)
 	}
-	sampleCount += n
+	defer file.Close()
-	for offset := 0; offset <= len(pcm)-winN; offset += hopN {
+	decoder := wav.NewDecoder(file)
-	    win := pcm[offset:min(offset+winN, len(pcm))]
+	if !decoder.IsValidFile() {
-	    t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
+		log.Fatal("Invalid WAV file")
-	    event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
+	}
-	    if event != "" {
+	if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
-		fmt.Printf("Detected two-tone sequence:\n")
+		log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
-		fmt.Printf("  Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
+			decoder.Format().SampleRate, decoder.Format().NumChannels)
-		fmt.Printf("  Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
+	}
-		det.reset()
+
-	    }
+	const fs = 8000
 	winN := int(float64(fs) * float64(*winMs) / 1000.0)
 	hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
 	if winN <= 0 || hopN <= 0 || hopN > winN {
 		log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
 	}
 	det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
 	buf := &audio.IntBuffer{
 		Format:         &audio.Format{SampleRate: fs, NumChannels: 1},
 		Data:           make([]int, 8192),
 		SourceBitDepth: 16,
 	}
 	sampleCount := 0
 	startTime := time.Now()
 	log.Println("Processing WAV file...")
 	for {
 		n, err := decoder.PCMBuffer(buf)
 		if err != nil || n == 0 || len(buf.Data) == 0 {
 			log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
 			break
 		}
 		pcm := make([]int16, n)
 		for i, v := range buf.Data[:n] {
 			pcm[i] = int16(v)
 		}
 		sampleCount += n
 		for offset := 0; offset <= len(pcm)-winN; offset += hopN {
 			win := pcm[offset:min(offset+winN, len(pcm))]
 			t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
 			event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
 			if event != "" {
 				fmt.Printf("Detected two-tone sequence:\n")
 				fmt.Printf("  Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
 				fmt.Printf("  Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
 				det.reset()
 			}
 		}
 	}
    }
 }
 // min returns the smaller of two integer values a and b.
 func min(a, b int) int {
-    if a < b {
+	if a < b {
-	return a
+		return a
-    }
+	}
-    return b
+	return b
 }