Compare commits

5 Commits

166
main.go
View File

@ -1,10 +1,9 @@
// -wav=output.wav -minA=500 -minB=2000 -rms=10 -ratio=0.3
package main package main
import ( import (
"flag" "flag"
"fmt" "fmt"
"log" "log/slog"
"math" "math"
"os" "os"
"time" "time"
@ -23,6 +22,7 @@ var (
hopMs = flag.Int("hop", 50, "Hop size (ms)") hopMs = flag.Int("hop", 50, "Hop size (ms)")
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection") ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal") rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
verbose = flag.Bool("verbose", false, "Enable debug logging")
) )
// Goertzel struct for frequency detection // Goertzel struct for frequency detection
@ -33,6 +33,11 @@ type goertzel struct {
coeff float64 coeff float64
} }
// newGoertzel initializes and returns a new Goertzel filter configured to detect a specific target frequency.
// targetHz specifies the frequency to detect in Hertz.
// fs is the sampling rate in Hertz.
// N is the number of samples to process.
// The function calculates the filter coefficients based on the provided parameters.
func newGoertzel(targetHz float64, fs float64, N int) *goertzel { func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
g := &goertzel{N: N, fs: fs} g := &goertzel{N: N, fs: fs}
g.k = int(0.5 + (float64(g.N)*targetHz)/fs) g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
@ -41,6 +46,10 @@ func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
return g return g
} }
// Power computes the power of the target frequency in the input signal x using the Goertzel algorithm.
// It processes the input slice x of length g.N, applying the Goertzel recurrence to accumulate state.
// The function returns the squared magnitude (power) of the frequency bin specified by g.k.
// x should be a slice of float64 samples, typically representing a windowed segment of a signal.
func (g *goertzel) Power(x []float64) float64 { func (g *goertzel) Power(x []float64) float64 {
var s0, s1, s2 float64 var s0, s1, s2 float64
for i := 0; i < g.N; i++ { for i := 0; i < g.N; i++ {
@ -54,6 +63,10 @@ func (g *goertzel) Power(x []float64) float64 {
return real*real + imag*imag return real*real + imag*imag
} }
// windowHann applies a Hann window to the input slice x in place.
// The Hann window is commonly used in signal processing to reduce spectral leakage
// by tapering the beginning and end of the signal to zero.
// The function modifies the input slice directly.
func windowHann(x []float64) { func windowHann(x []float64) {
n := float64(len(x)) n := float64(len(x))
for i := range x { for i := range x {
@ -61,6 +74,9 @@ func windowHann(x []float64) {
} }
} }
// pcmToFloat converts a slice of 16-bit PCM audio samples (buf) to a slice of float64 values.
// The output slice has length N, and each element is the float64 representation of the corresponding PCM sample.
// If N is greater than the length of buf, the output slice will contain zero values for the remaining elements.
func pcmToFloat(buf []int16, N int) []float64 { func pcmToFloat(buf []int16, N int) []float64 {
out := make([]float64, N) out := make([]float64, N)
for i := 0; i < N && i < len(buf); i++ { for i := 0; i < N && i < len(buf); i++ {
@ -69,6 +85,9 @@ func pcmToFloat(buf []int16, N int) []float64 {
return out return out
} }
// rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples.
// It returns the RMS as a float64, which represents the signal's effective amplitude.
// If the input slice is empty, it returns 0.
func rmsPCM(buf []int16) float64 { func rmsPCM(buf []int16) float64 {
var s float64 var s float64
for _, v := range buf { for _, v := range buf {
@ -97,15 +116,34 @@ type twoToneDetector struct {
aFreq float64 aFreq float64
aAccumMs int aAccumMs int
aStart time.Time aStart time.Time
aEnd time.Time
waitingB bool waitingB bool
bFreq float64 bFreq float64
bAccumMs int bAccumMs int
bStart time.Time bStart time.Time
bEnd time.Time
gapRemainMs int gapRemainMs int
logger *slog.Logger
} }
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector { // newTwoToneDetector creates and initializes a new twoToneDetector instance.
// Frequency range: 3003000 Hz, 10 Hz steps // It sets up a bank of Goertzel filters for frequencies ranging from 300 Hz to 3000 Hz in 10 Hz steps.
// Parameters:
//
// fs - Sample rate in Hz.
// winN - Window size for analysis.
// hopN - Hop size between windows.
// ratioThresh- Threshold for tone ratio detection.
// rmsThresh - RMS threshold for signal detection.
// minAms - Minimum duration of tone A in milliseconds.
// minBms - Minimum duration of tone B in milliseconds.
// gapMaxMs - Maximum allowed gap between tones in milliseconds.
// logger - Logger for diagnostic output.
//
// Returns:
//
// Pointer to a twoToneDetector configured with the specified parameters.
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int, logger *slog.Logger) *twoToneDetector {
freqs := make([]float64, 0) freqs := make([]float64, 0)
for f := 300.0; f <= 3000.0; f += 10.0 { for f := 300.0; f <= 3000.0; f += 10.0 {
freqs = append(freqs, f) freqs = append(freqs, f)
@ -125,10 +163,31 @@ func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minA
gapMaxMs: gapMaxMs, gapMaxMs: gapMaxMs,
freqs: freqs, freqs: freqs,
gzBank: gzBank, gzBank: gzBank,
logger: logger,
} }
} }
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) { // stepWindow processes a window of PCM audio samples to detect a two-tone sequence.
// It applies a Hann window to the samples, computes RMS and power ratios, and tracks
// the presence and duration of two distinct tones (A and B) according to configured thresholds.
// The function returns an event string (e.g., "TWO_TONE_DETECTED") when a valid two-tone
// sequence is detected, along with the frequencies and durations (in milliseconds) of both tones,
// and the timestamp of detection.
//
// Parameters:
//
// pcms - Slice of int16 PCM audio samples for the current window.
// t0 - Start time of the current window.
//
// Returns:
//
// event - Event string indicating detection status (e.g., "TWO_TONE_DETECTED" or "").
// aFreq - Frequency of detected Tone A (Hz).
// aDur - Duration of Tone A (milliseconds).
// bFreq - Frequency of detected Tone B (Hz).
// bDur - Duration of Tone B (milliseconds).
// timestamp - Timestamp of detection (time.Time). Zero value if no event detected.
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64, timestamp time.Time) {
xi := pcmToFloat(pcms, d.winN) xi := pcmToFloat(pcms, d.winN)
windowHann(xi) windowHann(xi)
@ -138,9 +197,16 @@ func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string,
} }
r := rmsPCM(pcms) r := rmsPCM(pcms)
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
now := t0
if r < d.rmsThresh { if r < d.rmsThresh {
d.logger.Debug("RMS below threshold, resetting",
"time", now.Format(time.RFC3339),
"rms", fmt.Sprintf("%.2f", r),
"threshold", d.rmsThresh)
d.reset() d.reset()
return "", 0, 0, 0, 0 return "", 0, 0, 0, 0, time.Time{}
} }
// Find frequency with highest power // Find frequency with highest power
@ -155,98 +221,143 @@ func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string,
} }
ratio := bestPow / (total + 1e-12) ratio := bestPow / (total + 1e-12)
if ratio < d.ratioThresh { if ratio < d.ratioThresh {
d.logger.Debug("Ratio below threshold, resetting",
"time", now.Format(time.RFC3339),
"ratio", fmt.Sprintf("%.3f", ratio),
"threshold", d.ratioThresh)
d.reset() d.reset()
return "", 0, 0, 0, 0 return "", 0, 0, 0, 0, time.Time{}
} }
freq := d.freqs[bestIdx] freq := d.freqs[bestIdx]
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
now := t0
if !d.inA && !d.waitingB { if !d.inA && !d.waitingB {
// Looking for Tone A // Looking for Tone A
d.inA = true d.inA = true
d.aFreq = freq d.aFreq = freq
d.aAccumMs = int(hopDur.Milliseconds())
d.aStart = now d.aStart = now
} else if d.inA && !d.waitingB { } else if d.inA && !d.waitingB {
// Confirming Tone A // Confirming Tone A
if math.Abs(freq-d.aFreq) <= 10.0 { if math.Abs(freq-d.aFreq) <= 10.0 {
d.aAccumMs += int(hopDur.Milliseconds()) d.aAccumMs += int(hopDur.Milliseconds())
d.aEnd = now.Add(hopDur)
if d.aAccumMs >= d.minAms { if d.aAccumMs >= d.minAms {
d.inA = false d.inA = false
d.waitingB = true d.waitingB = true
d.gapRemainMs = d.gapMaxMs d.gapRemainMs = d.gapMaxMs
} }
} else { } else {
d.logger.Debug("Frequency differs from Tone A, resetting",
"time", now.Format(time.RFC3339),
"freq", fmt.Sprintf("%.1f", freq),
"tone_a_freq", fmt.Sprintf("%.1f", d.aFreq))
d.reset() d.reset()
} }
} else if d.waitingB { } else if d.waitingB {
d.gapRemainMs -= int(hopDur.Milliseconds()) d.gapRemainMs -= int(hopDur.Milliseconds())
if d.gapRemainMs <= 0 { if d.gapRemainMs <= 0 {
d.logger.Debug("Gap exceeded max duration, resetting",
"time", now.Format(time.RFC3339),
"gap_max_ms", d.gapMaxMs)
d.reset() d.reset()
} else if math.Abs(freq-d.aFreq) > 10.0 { } else if math.Abs(freq-d.aFreq) > 10.0 {
// Check for Tone B (different frequency) // Check for Tone B
if d.bAccumMs == 0 { if d.bAccumMs == 0 {
d.bFreq = freq d.bFreq = freq
d.bStart = now d.bStart = now
} else if math.Abs(freq-d.bFreq) > 10.0 { } else if math.Abs(freq-d.bFreq) > 10.0 {
// Switched to a different frequency, reset B d.logger.Debug("Frequency differs from Tone B, resetting B",
"time", now.Format(time.RFC3339),
"freq", fmt.Sprintf("%.1f", freq),
"tone_b_freq", fmt.Sprintf("%.1f", d.bFreq))
d.bFreq = freq d.bFreq = freq
d.bAccumMs = 0 d.bAccumMs = 0
d.bStart = now d.bStart = now
} }
d.bAccumMs += int(hopDur.Milliseconds()) d.bAccumMs += int(hopDur.Milliseconds())
d.bEnd = now.Add(hopDur)
if d.bAccumMs >= d.minBms { if d.bAccumMs >= d.minBms {
event = "TWO_TONE_DETECTED" event = "TWO_TONE_DETECTED"
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs) aDurMs := float64(d.aEnd.Sub(d.aStart).Milliseconds())
bDurMs := float64(d.bEnd.Sub(d.bStart).Milliseconds())
d.logger.Info("Two-tone detected",
"time", now.Format(time.RFC3339),
"tone_a_freq", fmt.Sprintf("%.1f", d.aFreq),
"tone_a_duration_ms", fmt.Sprintf("%.0f", aDurMs),
"tone_b_freq", fmt.Sprintf("%.1f", d.bFreq),
"tone_b_duration_ms", fmt.Sprintf("%.0f", bDurMs))
return event, d.aFreq, aDurMs, d.bFreq, bDurMs, now
} }
} }
} }
return "", 0, 0, 0, 0 return "", 0, 0, 0, 0, time.Time{}
} }
// reset reinitializes all internal state fields of the twoToneDetector,
// clearing any ongoing detection data and preparing the detector for a new
// detection sequence. This includes resetting flags, frequency values,
// accumulated durations, start/end timestamps, and gap tracking.
func (d *twoToneDetector) reset() { func (d *twoToneDetector) reset() {
d.inA = false d.inA = false
d.aFreq = 0 d.aFreq = 0
d.aAccumMs = 0 d.aAccumMs = 0
d.aStart = time.Time{} d.aStart = time.Time{}
d.aEnd = time.Time{}
d.waitingB = false d.waitingB = false
d.bFreq = 0 d.bFreq = 0
d.bAccumMs = 0 d.bAccumMs = 0
d.bStart = time.Time{} d.bStart = time.Time{}
d.bEnd = time.Time{}
d.gapRemainMs = 0 d.gapRemainMs = 0
} }
func main() { func main() {
flag.Parse() flag.Parse()
// Initialize slog logger
logLevel := &slog.LevelVar{}
logLevel.Set(slog.LevelInfo)
if *verbose {
logLevel.Set(slog.LevelDebug)
}
logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
Level: logLevel,
}))
if *wavFile == "" { if *wavFile == "" {
log.Fatal("WAV file path is required (use -wav flag)") logger.Error("WAV file path is required", "flag", "-wav")
os.Exit(1)
} }
file, err := os.Open(*wavFile) file, err := os.Open(*wavFile)
if err != nil { if err != nil {
log.Fatalf("Failed to open WAV file: %v", err) logger.Error("Failed to open WAV file", "error", err)
os.Exit(1)
} }
defer file.Close() defer file.Close()
decoder := wav.NewDecoder(file) decoder := wav.NewDecoder(file)
if !decoder.IsValidFile() { if !decoder.IsValidFile() {
log.Fatal("Invalid WAV file") logger.Error("Invalid WAV file")
os.Exit(1)
} }
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 { if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels", logger.Error("WAV file must be mono 8kHz",
decoder.Format().SampleRate, decoder.Format().NumChannels) "sample_rate", decoder.Format().SampleRate,
"channels", decoder.Format().NumChannels)
os.Exit(1)
} }
const fs = 8000 const fs = 8000
winN := int(float64(fs) * float64(*winMs) / 1000.0) winN := int(float64(fs) * float64(*winMs) / 1000.0)
hopN := int(float64(fs) * float64(*hopMs) / 1000.0) hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
if winN <= 0 || hopN <= 0 || hopN > winN { if winN <= 0 || hopN <= 0 || hopN > winN {
log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN) logger.Error("Invalid window/hop parameters",
"winN", winN,
"hopN", hopN)
os.Exit(1)
} }
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs) det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs, logger)
buf := &audio.IntBuffer{ buf := &audio.IntBuffer{
Format: &audio.Format{SampleRate: fs, NumChannels: 1}, Format: &audio.Format{SampleRate: fs, NumChannels: 1},
@ -256,11 +367,13 @@ func main() {
sampleCount := 0 sampleCount := 0
startTime := time.Now() startTime := time.Now()
log.Println("Processing WAV file...") logger.Info("Processing WAV file")
for { for {
n, err := decoder.PCMBuffer(buf) n, err := decoder.PCMBuffer(buf)
if err != nil || n == 0 || len(buf.Data) == 0 { if err != nil || n == 0 || len(buf.Data) == 0 {
log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs)) logger.Info("Finished processing",
"samples", sampleCount,
"duration_sec", fmt.Sprintf("%.2f", float64(sampleCount)/float64(fs)))
break break
} }
@ -273,9 +386,9 @@ func main() {
for offset := 0; offset <= len(pcm)-winN; offset += hopN { for offset := 0; offset <= len(pcm)-winN; offset += hopN {
win := pcm[offset:min(offset+winN, len(pcm))] win := pcm[offset:min(offset+winN, len(pcm))]
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs)) t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t) event, aFreq, aDur, bFreq, bDur, timestamp := det.stepWindow(win, t)
if event != "" { if event != "" {
fmt.Printf("Detected two-tone sequence:\n") fmt.Printf("Detected two-tone sequence at %s:\n", timestamp.Format(time.RFC3339))
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur) fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur) fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
det.reset() det.reset()
@ -284,6 +397,7 @@ func main() {
} }
} }
// min returns the smaller of two integer values a and b.
func min(a, b int) int { func min(a, b int) int {
if a < b { if a < b {
return a return a