Refactor main.go for improved readability and add detailed comments for functions

This commit is contained in:
2025-08-15 18:44:10 -04:00
parent aec3310ebf
commit b670654a4b

534
main.go
View File

@ -2,291 +2,353 @@
package main
import (
"flag"
"fmt"
"log"
"math"
"os"
"time"
"flag"
"fmt"
"log"
"math"
"os"
"time"
"github.com/go-audio/audio"
"github.com/go-audio/wav"
"github.com/go-audio/audio"
"github.com/go-audio/wav"
)
// Command-line flags
var (
wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file")
minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)")
winMs = flag.Int("win", 100, "Window size (ms)")
hopMs = flag.Int("hop", 50, "Hop size (ms)")
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file")
minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)")
winMs = flag.Int("win", 100, "Window size (ms)")
hopMs = flag.Int("hop", 50, "Hop size (ms)")
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
)
// Goertzel struct for frequency detection
type goertzel struct {
N int
fs float64
k int
coeff float64
N int
fs float64
k int
coeff float64
}
// newGoertzel initializes and returns a new instance of the Goertzel algorithm for detecting a specific target frequency.
// Parameters:
//
// targetHz - the target frequency in Hertz to detect.
// fs - the sampling rate in Hertz.
// N - the number of samples to process.
//
// Returns:
//
// A pointer to a goertzel struct configured for the specified frequency and sample rate.
func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
g := &goertzel{N: N, fs: fs}
g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
g.coeff = 2.0 * math.Cos(omega)
return g
g := &goertzel{N: N, fs: fs}
g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
g.coeff = 2.0 * math.Cos(omega)
return g
}
// Power computes the power of the target frequency in the input signal x using the Goertzel algorithm.
// It processes the input slice x of length g.N and returns the squared magnitude of the frequency component
// specified by g.k. The function is typically used for efficient detection of specific frequencies in a signal.
func (g *goertzel) Power(x []float64) float64 {
var s0, s1, s2 float64
for i := 0; i < g.N; i++ {
s0 = x[i] + g.coeff*s1 - s2
s2 = s1
s1 = s0
}
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
real := s1 - s2*math.Cos(omega)
imag := s2 * math.Sin(omega)
return real*real + imag*imag
var s0, s1, s2 float64
for i := 0; i < g.N; i++ {
s0 = x[i] + g.coeff*s1 - s2
s2 = s1
s1 = s0
}
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
real := s1 - s2*math.Cos(omega)
imag := s2 * math.Sin(omega)
return real*real + imag*imag
}
// windowHann applies a Hann window to the input slice x in-place.
// The Hann window is commonly used in signal processing to reduce spectral leakage
// by tapering the beginning and end of the signal to zero.
// The function modifies the input slice directly.
func windowHann(x []float64) {
n := float64(len(x))
for i := range x {
x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
}
n := float64(len(x))
for i := range x {
x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
}
}
// pcmToFloat converts a slice of 16-bit PCM audio samples to a slice of float64 values.
// It processes up to N samples from the input buffer and returns the converted values.
// If the input buffer has fewer than N samples, only the available samples are converted.
func pcmToFloat(buf []int16, N int) []float64 {
out := make([]float64, N)
for i := 0; i < N && i < len(buf); i++ {
out[i] = float64(buf[i])
}
return out
out := make([]float64, N)
for i := 0; i < N && i < len(buf); i++ {
out[i] = float64(buf[i])
}
return out
}
// rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples.
// It returns the RMS as a float64, which is a measure of the signal's amplitude.
// If the input slice is empty, it returns 0.
func rmsPCM(buf []int16) float64 {
var s float64
for _, v := range buf {
f := float64(v)
s += f * f
}
if len(buf) == 0 {
return 0
}
return math.Sqrt(s / float64(len(buf)))
var s float64
for _, v := range buf {
f := float64(v)
s += f * f
}
if len(buf) == 0 {
return 0
}
return math.Sqrt(s / float64(len(buf)))
}
// twoToneDetector for detecting tone sequences
type twoToneDetector struct {
fs int
winN int
hopN int
ratioThresh float64
rmsThresh float64
minAms int
minBms int
gapMaxMs int
freqs []float64
gzBank []*goertzel
inA bool
aFreq float64
aAccumMs int
aStart time.Time
waitingB bool
bFreq float64
bAccumMs int
bStart time.Time
gapRemainMs int
fs int
winN int
hopN int
ratioThresh float64
rmsThresh float64
minAms int
minBms int
gapMaxMs int
freqs []float64
gzBank []*goertzel
inA bool
aFreq float64
aAccumMs int
aStart time.Time
waitingB bool
bFreq float64
bAccumMs int
bStart time.Time
gapRemainMs int
}
// newTwoToneDetector creates and initializes a twoToneDetector instance with the specified parameters.
// It sets up a bank of Goertzel filters for detecting tones in the frequency range 3003000 Hz (in 10 Hz steps).
//
// Parameters:
//
// fs - Sample rate in Hz.
// winN - Window size (number of samples per analysis window).
// hopN - Hop size (number of samples to advance per analysis).
// ratioThresh- Threshold for the ratio used in tone detection.
// rmsThresh - RMS threshold for signal energy.
// minAms - Minimum duration of a detected tone in milliseconds.
// minBms - Minimum duration of a break between tones in milliseconds.
// gapMaxMs - Maximum allowed gap between tones in milliseconds.
//
// Returns:
//
// Pointer to an initialized twoToneDetector.
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector {
// Frequency range: 3003000 Hz, 10 Hz steps
freqs := make([]float64, 0)
for f := 300.0; f <= 3000.0; f += 10.0 {
freqs = append(freqs, f)
}
gzBank := make([]*goertzel, len(freqs))
for i, f := range freqs {
gzBank[i] = newGoertzel(f, float64(fs), winN)
}
return &twoToneDetector{
fs: fs,
winN: winN,
hopN: hopN,
ratioThresh: ratioThresh,
rmsThresh: rmsThresh,
minAms: minAms,
minBms: minBms,
gapMaxMs: gapMaxMs,
freqs: freqs,
gzBank: gzBank,
}
// Frequency range: 3003000 Hz, 10 Hz steps
freqs := make([]float64, 0)
for f := 300.0; f <= 3000.0; f += 10.0 {
freqs = append(freqs, f)
}
gzBank := make([]*goertzel, len(freqs))
for i, f := range freqs {
gzBank[i] = newGoertzel(f, float64(fs), winN)
}
return &twoToneDetector{
fs: fs,
winN: winN,
hopN: hopN,
ratioThresh: ratioThresh,
rmsThresh: rmsThresh,
minAms: minAms,
minBms: minBms,
gapMaxMs: gapMaxMs,
freqs: freqs,
gzBank: gzBank,
}
}
// stepWindow processes a window of PCM audio samples to detect a two-tone event.
// It applies a Hann window, computes the RMS, and searches for the strongest frequency.
// The function tracks the presence and duration of two distinct tones (A and B) separated by a gap.
// If both tones are detected with sufficient duration and within specified thresholds, it returns
// an event string ("TWO_TONE_DETECTED") along with the frequencies and durations of tones A and B.
// If detection criteria are not met, it resets the detector state and returns zero values.
//
// Parameters:
//
// pcms []int16 - Slice of PCM audio samples for the current window.
// t0 time.Time - Timestamp corresponding to the start of the window.
//
// Returns:
//
// event string - Event name if two-tone detected, otherwise empty string.
// aFreq float64 - Frequency of tone A (Hz).
// aDur float64 - Duration of tone A (milliseconds).
// bFreq float64 - Frequency of tone B (Hz).
// bDur float64 - Duration of tone B (milliseconds).
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) {
xi := pcmToFloat(pcms, d.winN)
windowHann(xi)
xi := pcmToFloat(pcms, d.winN)
windowHann(xi)
var total float64
for _, v := range xi {
total += v * v
}
var total float64
for _, v := range xi {
total += v * v
}
r := rmsPCM(pcms)
if r < d.rmsThresh {
d.reset()
r := rmsPCM(pcms)
if r < d.rmsThresh {
d.reset()
return "", 0, 0, 0, 0
}
// Find frequency with highest power
bestIdx := -1
bestPow := 0.0
for i, gz := range d.gzBank {
p := gz.Power(xi)
if p > bestPow {
bestPow = p
bestIdx = i
}
}
ratio := bestPow / (total + 1e-12)
if ratio < d.ratioThresh {
d.reset()
return "", 0, 0, 0, 0
}
freq := d.freqs[bestIdx]
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
now := t0
if !d.inA && !d.waitingB {
// Looking for Tone A
d.inA = true
d.aFreq = freq
d.aAccumMs = int(hopDur.Milliseconds())
d.aStart = now
} else if d.inA && !d.waitingB {
// Confirming Tone A
if math.Abs(freq-d.aFreq) <= 10.0 {
d.aAccumMs += int(hopDur.Milliseconds())
if d.aAccumMs >= d.minAms {
d.inA = false
d.waitingB = true
d.gapRemainMs = d.gapMaxMs
}
} else {
d.reset()
}
} else if d.waitingB {
d.gapRemainMs -= int(hopDur.Milliseconds())
if d.gapRemainMs <= 0 {
d.reset()
} else if math.Abs(freq-d.aFreq) > 10.0 {
// Check for Tone B (different frequency)
if d.bAccumMs == 0 {
d.bFreq = freq
d.bStart = now
} else if math.Abs(freq-d.bFreq) > 10.0 {
// Switched to a different frequency, reset B
d.bFreq = freq
d.bAccumMs = 0
d.bStart = now
}
d.bAccumMs += int(hopDur.Milliseconds())
if d.bAccumMs >= d.minBms {
event = "TWO_TONE_DETECTED"
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
}
}
}
return "", 0, 0, 0, 0
}
// Find frequency with highest power
bestIdx := -1
bestPow := 0.0
for i, gz := range d.gzBank {
p := gz.Power(xi)
if p > bestPow {
bestPow = p
bestIdx = i
}
}
ratio := bestPow / (total + 1e-12)
if ratio < d.ratioThresh {
d.reset()
return "", 0, 0, 0, 0
}
freq := d.freqs[bestIdx]
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
now := t0
if !d.inA && !d.waitingB {
// Looking for Tone A
d.inA = true
d.aFreq = freq
d.aAccumMs = int(hopDur.Milliseconds())
d.aStart = now
} else if d.inA && !d.waitingB {
// Confirming Tone A
if math.Abs(freq-d.aFreq) <= 10.0 {
d.aAccumMs += int(hopDur.Milliseconds())
if d.aAccumMs >= d.minAms {
d.inA = false
d.waitingB = true
d.gapRemainMs = d.gapMaxMs
}
} else {
d.reset()
}
} else if d.waitingB {
d.gapRemainMs -= int(hopDur.Milliseconds())
if d.gapRemainMs <= 0 {
d.reset()
} else if math.Abs(freq-d.aFreq) > 10.0 {
// Check for Tone B (different frequency)
if d.bAccumMs == 0 {
d.bFreq = freq
d.bStart = now
} else if math.Abs(freq-d.bFreq) > 10.0 {
// Switched to a different frequency, reset B
d.bFreq = freq
d.bAccumMs = 0
d.bStart = now
}
d.bAccumMs += int(hopDur.Milliseconds())
if d.bAccumMs >= d.minBms {
event = "TWO_TONE_DETECTED"
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
}
}
}
return "", 0, 0, 0, 0
}
// reset reinitializes all internal state fields of the twoToneDetector to their default values.
// This includes clearing detection flags, frequencies, accumulated durations, start times, and gap timers.
func (d *twoToneDetector) reset() {
d.inA = false
d.aFreq = 0
d.aAccumMs = 0
d.aStart = time.Time{}
d.waitingB = false
d.bFreq = 0
d.bAccumMs = 0
d.bStart = time.Time{}
d.gapRemainMs = 0
d.inA = false
d.aFreq = 0
d.aAccumMs = 0
d.aStart = time.Time{}
d.waitingB = false
d.bFreq = 0
d.bAccumMs = 0
d.bStart = time.Time{}
d.gapRemainMs = 0
}
func main() {
flag.Parse()
if *wavFile == "" {
log.Fatal("WAV file path is required (use -wav flag)")
}
file, err := os.Open(*wavFile)
if err != nil {
log.Fatalf("Failed to open WAV file: %v", err)
}
defer file.Close()
decoder := wav.NewDecoder(file)
if !decoder.IsValidFile() {
log.Fatal("Invalid WAV file")
}
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
decoder.Format().SampleRate, decoder.Format().NumChannels)
}
const fs = 8000
winN := int(float64(fs) * float64(*winMs) / 1000.0)
hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
if winN <= 0 || hopN <= 0 || hopN > winN {
log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
}
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
buf := &audio.IntBuffer{
Format: &audio.Format{SampleRate: fs, NumChannels: 1},
Data: make([]int, 8192),
SourceBitDepth: 16,
}
sampleCount := 0
startTime := time.Now()
log.Println("Processing WAV file...")
for {
n, err := decoder.PCMBuffer(buf)
if err != nil || n == 0 || len(buf.Data) == 0 {
log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
break
flag.Parse()
if *wavFile == "" {
log.Fatal("WAV file path is required (use -wav flag)")
}
pcm := make([]int16, n)
for i, v := range buf.Data[:n] {
pcm[i] = int16(v)
file, err := os.Open(*wavFile)
if err != nil {
log.Fatalf("Failed to open WAV file: %v", err)
}
sampleCount += n
defer file.Close()
for offset := 0; offset <= len(pcm)-winN; offset += hopN {
win := pcm[offset:min(offset+winN, len(pcm))]
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
if event != "" {
fmt.Printf("Detected two-tone sequence:\n")
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
det.reset()
}
decoder := wav.NewDecoder(file)
if !decoder.IsValidFile() {
log.Fatal("Invalid WAV file")
}
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
decoder.Format().SampleRate, decoder.Format().NumChannels)
}
const fs = 8000
winN := int(float64(fs) * float64(*winMs) / 1000.0)
hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
if winN <= 0 || hopN <= 0 || hopN > winN {
log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
}
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
buf := &audio.IntBuffer{
Format: &audio.Format{SampleRate: fs, NumChannels: 1},
Data: make([]int, 8192),
SourceBitDepth: 16,
}
sampleCount := 0
startTime := time.Now()
log.Println("Processing WAV file...")
for {
n, err := decoder.PCMBuffer(buf)
if err != nil || n == 0 || len(buf.Data) == 0 {
log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
break
}
pcm := make([]int16, n)
for i, v := range buf.Data[:n] {
pcm[i] = int16(v)
}
sampleCount += n
for offset := 0; offset <= len(pcm)-winN; offset += hopN {
win := pcm[offset:min(offset+winN, len(pcm))]
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
if event != "" {
fmt.Printf("Detected two-tone sequence:\n")
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
det.reset()
}
}
}
}
}
// min returns the smaller of two integer values a and b.
func min(a, b int) int {
if a < b {
return a
}
return b
if a < b {
return a
}
return b
}