Compare commits

5 Commits

582
main.go
View File

@ -1,292 +1,406 @@
// -wav=output.wav -minA=500 -minB=2000 -rms=10 -ratio=0.3
package main
import (
"flag"
"fmt"
"log"
"math"
"os"
"time"
"flag"
"fmt"
"log/slog"
"math"
"os"
"time"
"github.com/go-audio/audio"
"github.com/go-audio/wav"
"github.com/go-audio/audio"
"github.com/go-audio/wav"
)
// Command-line flags
var (
wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file")
minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)")
winMs = flag.Int("win", 100, "Window size (ms)")
hopMs = flag.Int("hop", 50, "Hop size (ms)")
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file")
minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)")
winMs = flag.Int("win", 100, "Window size (ms)")
hopMs = flag.Int("hop", 50, "Hop size (ms)")
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
verbose = flag.Bool("verbose", false, "Enable debug logging")
)
// Goertzel struct for frequency detection
type goertzel struct {
N int
fs float64
k int
coeff float64
N int
fs float64
k int
coeff float64
}
// newGoertzel initializes and returns a new Goertzel filter configured to detect a specific target frequency.
// targetHz specifies the frequency to detect in Hertz.
// fs is the sampling rate in Hertz.
// N is the number of samples to process.
// The function calculates the filter coefficients based on the provided parameters.
func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
g := &goertzel{N: N, fs: fs}
g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
g.coeff = 2.0 * math.Cos(omega)
return g
g := &goertzel{N: N, fs: fs}
g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
g.coeff = 2.0 * math.Cos(omega)
return g
}
// Power computes the power of the target frequency in the input signal x using the Goertzel algorithm.
// It processes the input slice x of length g.N, applying the Goertzel recurrence to accumulate state.
// The function returns the squared magnitude (power) of the frequency bin specified by g.k.
// x should be a slice of float64 samples, typically representing a windowed segment of a signal.
func (g *goertzel) Power(x []float64) float64 {
var s0, s1, s2 float64
for i := 0; i < g.N; i++ {
s0 = x[i] + g.coeff*s1 - s2
s2 = s1
s1 = s0
}
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
real := s1 - s2*math.Cos(omega)
imag := s2 * math.Sin(omega)
return real*real + imag*imag
var s0, s1, s2 float64
for i := 0; i < g.N; i++ {
s0 = x[i] + g.coeff*s1 - s2
s2 = s1
s1 = s0
}
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
real := s1 - s2*math.Cos(omega)
imag := s2 * math.Sin(omega)
return real*real + imag*imag
}
// windowHann applies a Hann window to the input slice x in place.
// The Hann window is commonly used in signal processing to reduce spectral leakage
// by tapering the beginning and end of the signal to zero.
// The function modifies the input slice directly.
func windowHann(x []float64) {
n := float64(len(x))
for i := range x {
x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
}
n := float64(len(x))
for i := range x {
x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
}
}
// pcmToFloat converts a slice of 16-bit PCM audio samples (buf) to a slice of float64 values.
// The output slice has length N, and each element is the float64 representation of the corresponding PCM sample.
// If N is greater than the length of buf, the output slice will contain zero values for the remaining elements.
func pcmToFloat(buf []int16, N int) []float64 {
out := make([]float64, N)
for i := 0; i < N && i < len(buf); i++ {
out[i] = float64(buf[i])
}
return out
out := make([]float64, N)
for i := 0; i < N && i < len(buf); i++ {
out[i] = float64(buf[i])
}
return out
}
// rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples.
// It returns the RMS as a float64, which represents the signal's effective amplitude.
// If the input slice is empty, it returns 0.
func rmsPCM(buf []int16) float64 {
var s float64
for _, v := range buf {
f := float64(v)
s += f * f
}
if len(buf) == 0 {
return 0
}
return math.Sqrt(s / float64(len(buf)))
var s float64
for _, v := range buf {
f := float64(v)
s += f * f
}
if len(buf) == 0 {
return 0
}
return math.Sqrt(s / float64(len(buf)))
}
// twoToneDetector for detecting tone sequences
type twoToneDetector struct {
fs int
winN int
hopN int
ratioThresh float64
rmsThresh float64
minAms int
minBms int
gapMaxMs int
freqs []float64
gzBank []*goertzel
inA bool
aFreq float64
aAccumMs int
aStart time.Time
waitingB bool
bFreq float64
bAccumMs int
bStart time.Time
gapRemainMs int
fs int
winN int
hopN int
ratioThresh float64
rmsThresh float64
minAms int
minBms int
gapMaxMs int
freqs []float64
gzBank []*goertzel
inA bool
aFreq float64
aAccumMs int
aStart time.Time
aEnd time.Time
waitingB bool
bFreq float64
bAccumMs int
bStart time.Time
bEnd time.Time
gapRemainMs int
logger *slog.Logger
}
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector {
// Frequency range: 3003000 Hz, 10 Hz steps
freqs := make([]float64, 0)
for f := 300.0; f <= 3000.0; f += 10.0 {
freqs = append(freqs, f)
}
gzBank := make([]*goertzel, len(freqs))
for i, f := range freqs {
gzBank[i] = newGoertzel(f, float64(fs), winN)
}
return &twoToneDetector{
fs: fs,
winN: winN,
hopN: hopN,
ratioThresh: ratioThresh,
rmsThresh: rmsThresh,
minAms: minAms,
minBms: minBms,
gapMaxMs: gapMaxMs,
freqs: freqs,
gzBank: gzBank,
}
// newTwoToneDetector creates and initializes a new twoToneDetector instance.
// It sets up a bank of Goertzel filters for frequencies ranging from 300 Hz to 3000 Hz in 10 Hz steps.
// Parameters:
//
// fs - Sample rate in Hz.
// winN - Window size for analysis.
// hopN - Hop size between windows.
// ratioThresh- Threshold for tone ratio detection.
// rmsThresh - RMS threshold for signal detection.
// minAms - Minimum duration of tone A in milliseconds.
// minBms - Minimum duration of tone B in milliseconds.
// gapMaxMs - Maximum allowed gap between tones in milliseconds.
// logger - Logger for diagnostic output.
//
// Returns:
//
// Pointer to a twoToneDetector configured with the specified parameters.
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int, logger *slog.Logger) *twoToneDetector {
freqs := make([]float64, 0)
for f := 300.0; f <= 3000.0; f += 10.0 {
freqs = append(freqs, f)
}
gzBank := make([]*goertzel, len(freqs))
for i, f := range freqs {
gzBank[i] = newGoertzel(f, float64(fs), winN)
}
return &twoToneDetector{
fs: fs,
winN: winN,
hopN: hopN,
ratioThresh: ratioThresh,
rmsThresh: rmsThresh,
minAms: minAms,
minBms: minBms,
gapMaxMs: gapMaxMs,
freqs: freqs,
gzBank: gzBank,
logger: logger,
}
}
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) {
xi := pcmToFloat(pcms, d.winN)
windowHann(xi)
// stepWindow processes a window of PCM audio samples to detect a two-tone sequence.
// It applies a Hann window to the samples, computes RMS and power ratios, and tracks
// the presence and duration of two distinct tones (A and B) according to configured thresholds.
// The function returns an event string (e.g., "TWO_TONE_DETECTED") when a valid two-tone
// sequence is detected, along with the frequencies and durations (in milliseconds) of both tones,
// and the timestamp of detection.
//
// Parameters:
//
// pcms - Slice of int16 PCM audio samples for the current window.
// t0 - Start time of the current window.
//
// Returns:
//
// event - Event string indicating detection status (e.g., "TWO_TONE_DETECTED" or "").
// aFreq - Frequency of detected Tone A (Hz).
// aDur - Duration of Tone A (milliseconds).
// bFreq - Frequency of detected Tone B (Hz).
// bDur - Duration of Tone B (milliseconds).
// timestamp - Timestamp of detection (time.Time). Zero value if no event detected.
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64, timestamp time.Time) {
xi := pcmToFloat(pcms, d.winN)
windowHann(xi)
var total float64
for _, v := range xi {
total += v * v
}
r := rmsPCM(pcms)
if r < d.rmsThresh {
d.reset()
return "", 0, 0, 0, 0
}
// Find frequency with highest power
bestIdx := -1
bestPow := 0.0
for i, gz := range d.gzBank {
p := gz.Power(xi)
if p > bestPow {
bestPow = p
bestIdx = i
var total float64
for _, v := range xi {
total += v * v
}
}
ratio := bestPow / (total + 1e-12)
if ratio < d.ratioThresh {
d.reset()
return "", 0, 0, 0, 0
}
freq := d.freqs[bestIdx]
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
now := t0
r := rmsPCM(pcms)
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
now := t0
if !d.inA && !d.waitingB {
// Looking for Tone A
d.inA = true
d.aFreq = freq
d.aAccumMs = int(hopDur.Milliseconds())
d.aStart = now
} else if d.inA && !d.waitingB {
// Confirming Tone A
if math.Abs(freq-d.aFreq) <= 10.0 {
d.aAccumMs += int(hopDur.Milliseconds())
if d.aAccumMs >= d.minAms {
d.inA = false
d.waitingB = true
d.gapRemainMs = d.gapMaxMs
}
} else {
d.reset()
if r < d.rmsThresh {
d.logger.Debug("RMS below threshold, resetting",
"time", now.Format(time.RFC3339),
"rms", fmt.Sprintf("%.2f", r),
"threshold", d.rmsThresh)
d.reset()
return "", 0, 0, 0, 0, time.Time{}
}
} else if d.waitingB {
d.gapRemainMs -= int(hopDur.Milliseconds())
if d.gapRemainMs <= 0 {
d.reset()
} else if math.Abs(freq-d.aFreq) > 10.0 {
// Check for Tone B (different frequency)
if d.bAccumMs == 0 {
d.bFreq = freq
d.bStart = now
} else if math.Abs(freq-d.bFreq) > 10.0 {
// Switched to a different frequency, reset B
d.bFreq = freq
d.bAccumMs = 0
d.bStart = now
}
d.bAccumMs += int(hopDur.Milliseconds())
if d.bAccumMs >= d.minBms {
event = "TWO_TONE_DETECTED"
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
}
// Find frequency with highest power
bestIdx := -1
bestPow := 0.0
for i, gz := range d.gzBank {
p := gz.Power(xi)
if p > bestPow {
bestPow = p
bestIdx = i
}
}
}
return "", 0, 0, 0, 0
ratio := bestPow / (total + 1e-12)
if ratio < d.ratioThresh {
d.logger.Debug("Ratio below threshold, resetting",
"time", now.Format(time.RFC3339),
"ratio", fmt.Sprintf("%.3f", ratio),
"threshold", d.ratioThresh)
d.reset()
return "", 0, 0, 0, 0, time.Time{}
}
freq := d.freqs[bestIdx]
if !d.inA && !d.waitingB {
// Looking for Tone A
d.inA = true
d.aFreq = freq
d.aStart = now
} else if d.inA && !d.waitingB {
// Confirming Tone A
if math.Abs(freq-d.aFreq) <= 10.0 {
d.aAccumMs += int(hopDur.Milliseconds())
d.aEnd = now.Add(hopDur)
if d.aAccumMs >= d.minAms {
d.inA = false
d.waitingB = true
d.gapRemainMs = d.gapMaxMs
}
} else {
d.logger.Debug("Frequency differs from Tone A, resetting",
"time", now.Format(time.RFC3339),
"freq", fmt.Sprintf("%.1f", freq),
"tone_a_freq", fmt.Sprintf("%.1f", d.aFreq))
d.reset()
}
} else if d.waitingB {
d.gapRemainMs -= int(hopDur.Milliseconds())
if d.gapRemainMs <= 0 {
d.logger.Debug("Gap exceeded max duration, resetting",
"time", now.Format(time.RFC3339),
"gap_max_ms", d.gapMaxMs)
d.reset()
} else if math.Abs(freq-d.aFreq) > 10.0 {
// Check for Tone B
if d.bAccumMs == 0 {
d.bFreq = freq
d.bStart = now
} else if math.Abs(freq-d.bFreq) > 10.0 {
d.logger.Debug("Frequency differs from Tone B, resetting B",
"time", now.Format(time.RFC3339),
"freq", fmt.Sprintf("%.1f", freq),
"tone_b_freq", fmt.Sprintf("%.1f", d.bFreq))
d.bFreq = freq
d.bAccumMs = 0
d.bStart = now
}
d.bAccumMs += int(hopDur.Milliseconds())
d.bEnd = now.Add(hopDur)
if d.bAccumMs >= d.minBms {
event = "TWO_TONE_DETECTED"
aDurMs := float64(d.aEnd.Sub(d.aStart).Milliseconds())
bDurMs := float64(d.bEnd.Sub(d.bStart).Milliseconds())
d.logger.Info("Two-tone detected",
"time", now.Format(time.RFC3339),
"tone_a_freq", fmt.Sprintf("%.1f", d.aFreq),
"tone_a_duration_ms", fmt.Sprintf("%.0f", aDurMs),
"tone_b_freq", fmt.Sprintf("%.1f", d.bFreq),
"tone_b_duration_ms", fmt.Sprintf("%.0f", bDurMs))
return event, d.aFreq, aDurMs, d.bFreq, bDurMs, now
}
}
}
return "", 0, 0, 0, 0, time.Time{}
}
// reset reinitializes all internal state fields of the twoToneDetector,
// clearing any ongoing detection data and preparing the detector for a new
// detection sequence. This includes resetting flags, frequency values,
// accumulated durations, start/end timestamps, and gap tracking.
func (d *twoToneDetector) reset() {
d.inA = false
d.aFreq = 0
d.aAccumMs = 0
d.aStart = time.Time{}
d.waitingB = false
d.bFreq = 0
d.bAccumMs = 0
d.bStart = time.Time{}
d.gapRemainMs = 0
d.inA = false
d.aFreq = 0
d.aAccumMs = 0
d.aStart = time.Time{}
d.aEnd = time.Time{}
d.waitingB = false
d.bFreq = 0
d.bAccumMs = 0
d.bStart = time.Time{}
d.bEnd = time.Time{}
d.gapRemainMs = 0
}
func main() {
flag.Parse()
if *wavFile == "" {
log.Fatal("WAV file path is required (use -wav flag)")
}
flag.Parse()
file, err := os.Open(*wavFile)
if err != nil {
log.Fatalf("Failed to open WAV file: %v", err)
}
defer file.Close()
// Initialize slog logger
logLevel := &slog.LevelVar{}
logLevel.Set(slog.LevelInfo)
if *verbose {
logLevel.Set(slog.LevelDebug)
}
logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
Level: logLevel,
}))
decoder := wav.NewDecoder(file)
if !decoder.IsValidFile() {
log.Fatal("Invalid WAV file")
}
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
decoder.Format().SampleRate, decoder.Format().NumChannels)
}
const fs = 8000
winN := int(float64(fs) * float64(*winMs) / 1000.0)
hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
if winN <= 0 || hopN <= 0 || hopN > winN {
log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
}
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
buf := &audio.IntBuffer{
Format: &audio.Format{SampleRate: fs, NumChannels: 1},
Data: make([]int, 8192),
SourceBitDepth: 16,
}
sampleCount := 0
startTime := time.Now()
log.Println("Processing WAV file...")
for {
n, err := decoder.PCMBuffer(buf)
if err != nil || n == 0 || len(buf.Data) == 0 {
log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
break
if *wavFile == "" {
logger.Error("WAV file path is required", "flag", "-wav")
os.Exit(1)
}
pcm := make([]int16, n)
for i, v := range buf.Data[:n] {
pcm[i] = int16(v)
file, err := os.Open(*wavFile)
if err != nil {
logger.Error("Failed to open WAV file", "error", err)
os.Exit(1)
}
sampleCount += n
defer file.Close()
for offset := 0; offset <= len(pcm)-winN; offset += hopN {
win := pcm[offset:min(offset+winN, len(pcm))]
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
if event != "" {
fmt.Printf("Detected two-tone sequence:\n")
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
det.reset()
}
decoder := wav.NewDecoder(file)
if !decoder.IsValidFile() {
logger.Error("Invalid WAV file")
os.Exit(1)
}
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
logger.Error("WAV file must be mono 8kHz",
"sample_rate", decoder.Format().SampleRate,
"channels", decoder.Format().NumChannels)
os.Exit(1)
}
const fs = 8000
winN := int(float64(fs) * float64(*winMs) / 1000.0)
hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
if winN <= 0 || hopN <= 0 || hopN > winN {
logger.Error("Invalid window/hop parameters",
"winN", winN,
"hopN", hopN)
os.Exit(1)
}
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs, logger)
buf := &audio.IntBuffer{
Format: &audio.Format{SampleRate: fs, NumChannels: 1},
Data: make([]int, 8192),
SourceBitDepth: 16,
}
sampleCount := 0
startTime := time.Now()
logger.Info("Processing WAV file")
for {
n, err := decoder.PCMBuffer(buf)
if err != nil || n == 0 || len(buf.Data) == 0 {
logger.Info("Finished processing",
"samples", sampleCount,
"duration_sec", fmt.Sprintf("%.2f", float64(sampleCount)/float64(fs)))
break
}
pcm := make([]int16, n)
for i, v := range buf.Data[:n] {
pcm[i] = int16(v)
}
sampleCount += n
for offset := 0; offset <= len(pcm)-winN; offset += hopN {
win := pcm[offset:min(offset+winN, len(pcm))]
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
event, aFreq, aDur, bFreq, bDur, timestamp := det.stepWindow(win, t)
if event != "" {
fmt.Printf("Detected two-tone sequence at %s:\n", timestamp.Format(time.RFC3339))
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
det.reset()
}
}
}
}
}
// min returns the smaller of two integer values a and b.
func min(a, b int) int {
if a < b {
return a
}
return b
if a < b {
return a
}
return b
}