Add initial implementation of two-tone detector with WAV file support and required dependencies

This commit is contained in:
2025-08-15 18:24:11 -04:00
commit aec3310ebf
7 changed files with 308 additions and 0 deletions

BIN
audio/call001.wav Normal file

Binary file not shown.

BIN
audio/call002.wav Normal file

Binary file not shown.

BIN
audio/call003.wav Normal file

Binary file not shown.

BIN
audio/output.wav Normal file

Binary file not shown.

10
go.mod Normal file
View File

@ -0,0 +1,10 @@
module git.savin.nyc/alex/go-two-tone-detector-wav
go 1.25
require (
github.com/go-audio/audio v1.0.0
github.com/go-audio/wav v1.1.0
)
require github.com/go-audio/riff v1.0.0 // indirect

6
go.sum Normal file
View File

@ -0,0 +1,6 @@
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=

292
main.go Normal file
View File

@ -0,0 +1,292 @@
// -wav=output.wav -minA=500 -minB=2000 -rms=10 -ratio=0.3
package main
import (
"flag"
"fmt"
"log"
"math"
"os"
"time"
"github.com/go-audio/audio"
"github.com/go-audio/wav"
)
// Command-line flags
var (
wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file")
minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)")
winMs = flag.Int("win", 100, "Window size (ms)")
hopMs = flag.Int("hop", 50, "Hop size (ms)")
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
)
// Goertzel struct for frequency detection
type goertzel struct {
N int
fs float64
k int
coeff float64
}
func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
g := &goertzel{N: N, fs: fs}
g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
g.coeff = 2.0 * math.Cos(omega)
return g
}
func (g *goertzel) Power(x []float64) float64 {
var s0, s1, s2 float64
for i := 0; i < g.N; i++ {
s0 = x[i] + g.coeff*s1 - s2
s2 = s1
s1 = s0
}
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
real := s1 - s2*math.Cos(omega)
imag := s2 * math.Sin(omega)
return real*real + imag*imag
}
func windowHann(x []float64) {
n := float64(len(x))
for i := range x {
x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
}
}
func pcmToFloat(buf []int16, N int) []float64 {
out := make([]float64, N)
for i := 0; i < N && i < len(buf); i++ {
out[i] = float64(buf[i])
}
return out
}
func rmsPCM(buf []int16) float64 {
var s float64
for _, v := range buf {
f := float64(v)
s += f * f
}
if len(buf) == 0 {
return 0
}
return math.Sqrt(s / float64(len(buf)))
}
// twoToneDetector for detecting tone sequences
type twoToneDetector struct {
fs int
winN int
hopN int
ratioThresh float64
rmsThresh float64
minAms int
minBms int
gapMaxMs int
freqs []float64
gzBank []*goertzel
inA bool
aFreq float64
aAccumMs int
aStart time.Time
waitingB bool
bFreq float64
bAccumMs int
bStart time.Time
gapRemainMs int
}
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector {
// Frequency range: 3003000 Hz, 10 Hz steps
freqs := make([]float64, 0)
for f := 300.0; f <= 3000.0; f += 10.0 {
freqs = append(freqs, f)
}
gzBank := make([]*goertzel, len(freqs))
for i, f := range freqs {
gzBank[i] = newGoertzel(f, float64(fs), winN)
}
return &twoToneDetector{
fs: fs,
winN: winN,
hopN: hopN,
ratioThresh: ratioThresh,
rmsThresh: rmsThresh,
minAms: minAms,
minBms: minBms,
gapMaxMs: gapMaxMs,
freqs: freqs,
gzBank: gzBank,
}
}
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) {
xi := pcmToFloat(pcms, d.winN)
windowHann(xi)
var total float64
for _, v := range xi {
total += v * v
}
r := rmsPCM(pcms)
if r < d.rmsThresh {
d.reset()
return "", 0, 0, 0, 0
}
// Find frequency with highest power
bestIdx := -1
bestPow := 0.0
for i, gz := range d.gzBank {
p := gz.Power(xi)
if p > bestPow {
bestPow = p
bestIdx = i
}
}
ratio := bestPow / (total + 1e-12)
if ratio < d.ratioThresh {
d.reset()
return "", 0, 0, 0, 0
}
freq := d.freqs[bestIdx]
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
now := t0
if !d.inA && !d.waitingB {
// Looking for Tone A
d.inA = true
d.aFreq = freq
d.aAccumMs = int(hopDur.Milliseconds())
d.aStart = now
} else if d.inA && !d.waitingB {
// Confirming Tone A
if math.Abs(freq-d.aFreq) <= 10.0 {
d.aAccumMs += int(hopDur.Milliseconds())
if d.aAccumMs >= d.minAms {
d.inA = false
d.waitingB = true
d.gapRemainMs = d.gapMaxMs
}
} else {
d.reset()
}
} else if d.waitingB {
d.gapRemainMs -= int(hopDur.Milliseconds())
if d.gapRemainMs <= 0 {
d.reset()
} else if math.Abs(freq-d.aFreq) > 10.0 {
// Check for Tone B (different frequency)
if d.bAccumMs == 0 {
d.bFreq = freq
d.bStart = now
} else if math.Abs(freq-d.bFreq) > 10.0 {
// Switched to a different frequency, reset B
d.bFreq = freq
d.bAccumMs = 0
d.bStart = now
}
d.bAccumMs += int(hopDur.Milliseconds())
if d.bAccumMs >= d.minBms {
event = "TWO_TONE_DETECTED"
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
}
}
}
return "", 0, 0, 0, 0
}
func (d *twoToneDetector) reset() {
d.inA = false
d.aFreq = 0
d.aAccumMs = 0
d.aStart = time.Time{}
d.waitingB = false
d.bFreq = 0
d.bAccumMs = 0
d.bStart = time.Time{}
d.gapRemainMs = 0
}
func main() {
flag.Parse()
if *wavFile == "" {
log.Fatal("WAV file path is required (use -wav flag)")
}
file, err := os.Open(*wavFile)
if err != nil {
log.Fatalf("Failed to open WAV file: %v", err)
}
defer file.Close()
decoder := wav.NewDecoder(file)
if !decoder.IsValidFile() {
log.Fatal("Invalid WAV file")
}
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
decoder.Format().SampleRate, decoder.Format().NumChannels)
}
const fs = 8000
winN := int(float64(fs) * float64(*winMs) / 1000.0)
hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
if winN <= 0 || hopN <= 0 || hopN > winN {
log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
}
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
buf := &audio.IntBuffer{
Format: &audio.Format{SampleRate: fs, NumChannels: 1},
Data: make([]int, 8192),
SourceBitDepth: 16,
}
sampleCount := 0
startTime := time.Now()
log.Println("Processing WAV file...")
for {
n, err := decoder.PCMBuffer(buf)
if err != nil || n == 0 || len(buf.Data) == 0 {
log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
break
}
pcm := make([]int16, n)
for i, v := range buf.Data[:n] {
pcm[i] = int16(v)
}
sampleCount += n
for offset := 0; offset <= len(pcm)-winN; offset += hopN {
win := pcm[offset:min(offset+winN, len(pcm))]
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
if event != "" {
fmt.Printf("Detected two-tone sequence:\n")
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
det.reset()
}
}
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}