Files

293 lines
7.2 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// -wav=output.wav -minA=500 -minB=2000 -rms=10 -ratio=0.3
package main
import (
"flag"
"fmt"
"log"
"math"
"os"
"time"
"github.com/go-audio/audio"
"github.com/go-audio/wav"
)
// Command-line flags
var (
wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file")
minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)")
winMs = flag.Int("win", 100, "Window size (ms)")
hopMs = flag.Int("hop", 50, "Hop size (ms)")
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
)
// Goertzel struct for frequency detection
type goertzel struct {
N int
fs float64
k int
coeff float64
}
func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
g := &goertzel{N: N, fs: fs}
g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
g.coeff = 2.0 * math.Cos(omega)
return g
}
func (g *goertzel) Power(x []float64) float64 {
var s0, s1, s2 float64
for i := 0; i < g.N; i++ {
s0 = x[i] + g.coeff*s1 - s2
s2 = s1
s1 = s0
}
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
real := s1 - s2*math.Cos(omega)
imag := s2 * math.Sin(omega)
return real*real + imag*imag
}
func windowHann(x []float64) {
n := float64(len(x))
for i := range x {
x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
}
}
func pcmToFloat(buf []int16, N int) []float64 {
out := make([]float64, N)
for i := 0; i < N && i < len(buf); i++ {
out[i] = float64(buf[i])
}
return out
}
func rmsPCM(buf []int16) float64 {
var s float64
for _, v := range buf {
f := float64(v)
s += f * f
}
if len(buf) == 0 {
return 0
}
return math.Sqrt(s / float64(len(buf)))
}
// twoToneDetector for detecting tone sequences
type twoToneDetector struct {
fs int
winN int
hopN int
ratioThresh float64
rmsThresh float64
minAms int
minBms int
gapMaxMs int
freqs []float64
gzBank []*goertzel
inA bool
aFreq float64
aAccumMs int
aStart time.Time
waitingB bool
bFreq float64
bAccumMs int
bStart time.Time
gapRemainMs int
}
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector {
// Frequency range: 3003000 Hz, 10 Hz steps
freqs := make([]float64, 0)
for f := 300.0; f <= 3000.0; f += 10.0 {
freqs = append(freqs, f)
}
gzBank := make([]*goertzel, len(freqs))
for i, f := range freqs {
gzBank[i] = newGoertzel(f, float64(fs), winN)
}
return &twoToneDetector{
fs: fs,
winN: winN,
hopN: hopN,
ratioThresh: ratioThresh,
rmsThresh: rmsThresh,
minAms: minAms,
minBms: minBms,
gapMaxMs: gapMaxMs,
freqs: freqs,
gzBank: gzBank,
}
}
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) {
xi := pcmToFloat(pcms, d.winN)
windowHann(xi)
var total float64
for _, v := range xi {
total += v * v
}
r := rmsPCM(pcms)
if r < d.rmsThresh {
d.reset()
return "", 0, 0, 0, 0
}
// Find frequency with highest power
bestIdx := -1
bestPow := 0.0
for i, gz := range d.gzBank {
p := gz.Power(xi)
if p > bestPow {
bestPow = p
bestIdx = i
}
}
ratio := bestPow / (total + 1e-12)
if ratio < d.ratioThresh {
d.reset()
return "", 0, 0, 0, 0
}
freq := d.freqs[bestIdx]
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
now := t0
if !d.inA && !d.waitingB {
// Looking for Tone A
d.inA = true
d.aFreq = freq
d.aAccumMs = int(hopDur.Milliseconds())
d.aStart = now
} else if d.inA && !d.waitingB {
// Confirming Tone A
if math.Abs(freq-d.aFreq) <= 10.0 {
d.aAccumMs += int(hopDur.Milliseconds())
if d.aAccumMs >= d.minAms {
d.inA = false
d.waitingB = true
d.gapRemainMs = d.gapMaxMs
}
} else {
d.reset()
}
} else if d.waitingB {
d.gapRemainMs -= int(hopDur.Milliseconds())
if d.gapRemainMs <= 0 {
d.reset()
} else if math.Abs(freq-d.aFreq) > 10.0 {
// Check for Tone B (different frequency)
if d.bAccumMs == 0 {
d.bFreq = freq
d.bStart = now
} else if math.Abs(freq-d.bFreq) > 10.0 {
// Switched to a different frequency, reset B
d.bFreq = freq
d.bAccumMs = 0
d.bStart = now
}
d.bAccumMs += int(hopDur.Milliseconds())
if d.bAccumMs >= d.minBms {
event = "TWO_TONE_DETECTED"
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
}
}
}
return "", 0, 0, 0, 0
}
func (d *twoToneDetector) reset() {
d.inA = false
d.aFreq = 0
d.aAccumMs = 0
d.aStart = time.Time{}
d.waitingB = false
d.bFreq = 0
d.bAccumMs = 0
d.bStart = time.Time{}
d.gapRemainMs = 0
}
func main() {
flag.Parse()
if *wavFile == "" {
log.Fatal("WAV file path is required (use -wav flag)")
}
file, err := os.Open(*wavFile)
if err != nil {
log.Fatalf("Failed to open WAV file: %v", err)
}
defer file.Close()
decoder := wav.NewDecoder(file)
if !decoder.IsValidFile() {
log.Fatal("Invalid WAV file")
}
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
decoder.Format().SampleRate, decoder.Format().NumChannels)
}
const fs = 8000
winN := int(float64(fs) * float64(*winMs) / 1000.0)
hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
if winN <= 0 || hopN <= 0 || hopN > winN {
log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
}
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
buf := &audio.IntBuffer{
Format: &audio.Format{SampleRate: fs, NumChannels: 1},
Data: make([]int, 8192),
SourceBitDepth: 16,
}
sampleCount := 0
startTime := time.Now()
log.Println("Processing WAV file...")
for {
n, err := decoder.PCMBuffer(buf)
if err != nil || n == 0 || len(buf.Data) == 0 {
log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
break
}
pcm := make([]int16, n)
for i, v := range buf.Data[:n] {
pcm[i] = int16(v)
}
sampleCount += n
for offset := 0; offset <= len(pcm)-winN; offset += hopN {
win := pcm[offset:min(offset+winN, len(pcm))]
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
if event != "" {
fmt.Printf("Detected two-tone sequence:\n")
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
det.reset()
}
}
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}