Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
8e7840e504 | |||
00d5b5f017 | |||
89b1999048 | |||
bd64dd24dc | |||
b670654a4b |
582
main.go
582
main.go
@ -1,292 +1,406 @@
|
||||
// -wav=output.wav -minA=500 -minB=2000 -rms=10 -ratio=0.3
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"time"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/go-audio/audio"
|
||||
"github.com/go-audio/wav"
|
||||
"github.com/go-audio/audio"
|
||||
"github.com/go-audio/wav"
|
||||
)
|
||||
|
||||
// Command-line flags
|
||||
var (
|
||||
wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file")
|
||||
minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
|
||||
minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
|
||||
gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)")
|
||||
winMs = flag.Int("win", 100, "Window size (ms)")
|
||||
hopMs = flag.Int("hop", 50, "Hop size (ms)")
|
||||
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
|
||||
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
|
||||
wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file")
|
||||
minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
|
||||
minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
|
||||
gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)")
|
||||
winMs = flag.Int("win", 100, "Window size (ms)")
|
||||
hopMs = flag.Int("hop", 50, "Hop size (ms)")
|
||||
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
|
||||
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
|
||||
verbose = flag.Bool("verbose", false, "Enable debug logging")
|
||||
)
|
||||
|
||||
// Goertzel struct for frequency detection
|
||||
type goertzel struct {
|
||||
N int
|
||||
fs float64
|
||||
k int
|
||||
coeff float64
|
||||
N int
|
||||
fs float64
|
||||
k int
|
||||
coeff float64
|
||||
}
|
||||
|
||||
// newGoertzel initializes and returns a new Goertzel filter configured to detect a specific target frequency.
|
||||
// targetHz specifies the frequency to detect in Hertz.
|
||||
// fs is the sampling rate in Hertz.
|
||||
// N is the number of samples to process.
|
||||
// The function calculates the filter coefficients based on the provided parameters.
|
||||
func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
|
||||
g := &goertzel{N: N, fs: fs}
|
||||
g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
|
||||
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
|
||||
g.coeff = 2.0 * math.Cos(omega)
|
||||
return g
|
||||
g := &goertzel{N: N, fs: fs}
|
||||
g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
|
||||
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
|
||||
g.coeff = 2.0 * math.Cos(omega)
|
||||
return g
|
||||
}
|
||||
|
||||
// Power computes the power of the target frequency in the input signal x using the Goertzel algorithm.
|
||||
// It processes the input slice x of length g.N, applying the Goertzel recurrence to accumulate state.
|
||||
// The function returns the squared magnitude (power) of the frequency bin specified by g.k.
|
||||
// x should be a slice of float64 samples, typically representing a windowed segment of a signal.
|
||||
func (g *goertzel) Power(x []float64) float64 {
|
||||
var s0, s1, s2 float64
|
||||
for i := 0; i < g.N; i++ {
|
||||
s0 = x[i] + g.coeff*s1 - s2
|
||||
s2 = s1
|
||||
s1 = s0
|
||||
}
|
||||
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
|
||||
real := s1 - s2*math.Cos(omega)
|
||||
imag := s2 * math.Sin(omega)
|
||||
return real*real + imag*imag
|
||||
var s0, s1, s2 float64
|
||||
for i := 0; i < g.N; i++ {
|
||||
s0 = x[i] + g.coeff*s1 - s2
|
||||
s2 = s1
|
||||
s1 = s0
|
||||
}
|
||||
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
|
||||
real := s1 - s2*math.Cos(omega)
|
||||
imag := s2 * math.Sin(omega)
|
||||
return real*real + imag*imag
|
||||
}
|
||||
|
||||
// windowHann applies a Hann window to the input slice x in place.
|
||||
// The Hann window is commonly used in signal processing to reduce spectral leakage
|
||||
// by tapering the beginning and end of the signal to zero.
|
||||
// The function modifies the input slice directly.
|
||||
func windowHann(x []float64) {
|
||||
n := float64(len(x))
|
||||
for i := range x {
|
||||
x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
|
||||
}
|
||||
n := float64(len(x))
|
||||
for i := range x {
|
||||
x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
|
||||
}
|
||||
}
|
||||
|
||||
// pcmToFloat converts a slice of 16-bit PCM audio samples (buf) to a slice of float64 values.
|
||||
// The output slice has length N, and each element is the float64 representation of the corresponding PCM sample.
|
||||
// If N is greater than the length of buf, the output slice will contain zero values for the remaining elements.
|
||||
func pcmToFloat(buf []int16, N int) []float64 {
|
||||
out := make([]float64, N)
|
||||
for i := 0; i < N && i < len(buf); i++ {
|
||||
out[i] = float64(buf[i])
|
||||
}
|
||||
return out
|
||||
out := make([]float64, N)
|
||||
for i := 0; i < N && i < len(buf); i++ {
|
||||
out[i] = float64(buf[i])
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples.
|
||||
// It returns the RMS as a float64, which represents the signal's effective amplitude.
|
||||
// If the input slice is empty, it returns 0.
|
||||
func rmsPCM(buf []int16) float64 {
|
||||
var s float64
|
||||
for _, v := range buf {
|
||||
f := float64(v)
|
||||
s += f * f
|
||||
}
|
||||
if len(buf) == 0 {
|
||||
return 0
|
||||
}
|
||||
return math.Sqrt(s / float64(len(buf)))
|
||||
var s float64
|
||||
for _, v := range buf {
|
||||
f := float64(v)
|
||||
s += f * f
|
||||
}
|
||||
if len(buf) == 0 {
|
||||
return 0
|
||||
}
|
||||
return math.Sqrt(s / float64(len(buf)))
|
||||
}
|
||||
|
||||
// twoToneDetector for detecting tone sequences
|
||||
type twoToneDetector struct {
|
||||
fs int
|
||||
winN int
|
||||
hopN int
|
||||
ratioThresh float64
|
||||
rmsThresh float64
|
||||
minAms int
|
||||
minBms int
|
||||
gapMaxMs int
|
||||
freqs []float64
|
||||
gzBank []*goertzel
|
||||
inA bool
|
||||
aFreq float64
|
||||
aAccumMs int
|
||||
aStart time.Time
|
||||
waitingB bool
|
||||
bFreq float64
|
||||
bAccumMs int
|
||||
bStart time.Time
|
||||
gapRemainMs int
|
||||
fs int
|
||||
winN int
|
||||
hopN int
|
||||
ratioThresh float64
|
||||
rmsThresh float64
|
||||
minAms int
|
||||
minBms int
|
||||
gapMaxMs int
|
||||
freqs []float64
|
||||
gzBank []*goertzel
|
||||
inA bool
|
||||
aFreq float64
|
||||
aAccumMs int
|
||||
aStart time.Time
|
||||
aEnd time.Time
|
||||
waitingB bool
|
||||
bFreq float64
|
||||
bAccumMs int
|
||||
bStart time.Time
|
||||
bEnd time.Time
|
||||
gapRemainMs int
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector {
|
||||
// Frequency range: 300–3000 Hz, 10 Hz steps
|
||||
freqs := make([]float64, 0)
|
||||
for f := 300.0; f <= 3000.0; f += 10.0 {
|
||||
freqs = append(freqs, f)
|
||||
}
|
||||
gzBank := make([]*goertzel, len(freqs))
|
||||
for i, f := range freqs {
|
||||
gzBank[i] = newGoertzel(f, float64(fs), winN)
|
||||
}
|
||||
return &twoToneDetector{
|
||||
fs: fs,
|
||||
winN: winN,
|
||||
hopN: hopN,
|
||||
ratioThresh: ratioThresh,
|
||||
rmsThresh: rmsThresh,
|
||||
minAms: minAms,
|
||||
minBms: minBms,
|
||||
gapMaxMs: gapMaxMs,
|
||||
freqs: freqs,
|
||||
gzBank: gzBank,
|
||||
}
|
||||
// newTwoToneDetector creates and initializes a new twoToneDetector instance.
|
||||
// It sets up a bank of Goertzel filters for frequencies ranging from 300 Hz to 3000 Hz in 10 Hz steps.
|
||||
// Parameters:
|
||||
//
|
||||
// fs - Sample rate in Hz.
|
||||
// winN - Window size for analysis.
|
||||
// hopN - Hop size between windows.
|
||||
// ratioThresh- Threshold for tone ratio detection.
|
||||
// rmsThresh - RMS threshold for signal detection.
|
||||
// minAms - Minimum duration of tone A in milliseconds.
|
||||
// minBms - Minimum duration of tone B in milliseconds.
|
||||
// gapMaxMs - Maximum allowed gap between tones in milliseconds.
|
||||
// logger - Logger for diagnostic output.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// Pointer to a twoToneDetector configured with the specified parameters.
|
||||
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int, logger *slog.Logger) *twoToneDetector {
|
||||
freqs := make([]float64, 0)
|
||||
for f := 300.0; f <= 3000.0; f += 10.0 {
|
||||
freqs = append(freqs, f)
|
||||
}
|
||||
gzBank := make([]*goertzel, len(freqs))
|
||||
for i, f := range freqs {
|
||||
gzBank[i] = newGoertzel(f, float64(fs), winN)
|
||||
}
|
||||
return &twoToneDetector{
|
||||
fs: fs,
|
||||
winN: winN,
|
||||
hopN: hopN,
|
||||
ratioThresh: ratioThresh,
|
||||
rmsThresh: rmsThresh,
|
||||
minAms: minAms,
|
||||
minBms: minBms,
|
||||
gapMaxMs: gapMaxMs,
|
||||
freqs: freqs,
|
||||
gzBank: gzBank,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) {
|
||||
xi := pcmToFloat(pcms, d.winN)
|
||||
windowHann(xi)
|
||||
// stepWindow processes a window of PCM audio samples to detect a two-tone sequence.
|
||||
// It applies a Hann window to the samples, computes RMS and power ratios, and tracks
|
||||
// the presence and duration of two distinct tones (A and B) according to configured thresholds.
|
||||
// The function returns an event string (e.g., "TWO_TONE_DETECTED") when a valid two-tone
|
||||
// sequence is detected, along with the frequencies and durations (in milliseconds) of both tones,
|
||||
// and the timestamp of detection.
|
||||
//
|
||||
// Parameters:
|
||||
//
|
||||
// pcms - Slice of int16 PCM audio samples for the current window.
|
||||
// t0 - Start time of the current window.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// event - Event string indicating detection status (e.g., "TWO_TONE_DETECTED" or "").
|
||||
// aFreq - Frequency of detected Tone A (Hz).
|
||||
// aDur - Duration of Tone A (milliseconds).
|
||||
// bFreq - Frequency of detected Tone B (Hz).
|
||||
// bDur - Duration of Tone B (milliseconds).
|
||||
// timestamp - Timestamp of detection (time.Time). Zero value if no event detected.
|
||||
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64, timestamp time.Time) {
|
||||
xi := pcmToFloat(pcms, d.winN)
|
||||
windowHann(xi)
|
||||
|
||||
var total float64
|
||||
for _, v := range xi {
|
||||
total += v * v
|
||||
}
|
||||
|
||||
r := rmsPCM(pcms)
|
||||
if r < d.rmsThresh {
|
||||
d.reset()
|
||||
return "", 0, 0, 0, 0
|
||||
}
|
||||
|
||||
// Find frequency with highest power
|
||||
bestIdx := -1
|
||||
bestPow := 0.0
|
||||
for i, gz := range d.gzBank {
|
||||
p := gz.Power(xi)
|
||||
if p > bestPow {
|
||||
bestPow = p
|
||||
bestIdx = i
|
||||
var total float64
|
||||
for _, v := range xi {
|
||||
total += v * v
|
||||
}
|
||||
}
|
||||
ratio := bestPow / (total + 1e-12)
|
||||
if ratio < d.ratioThresh {
|
||||
d.reset()
|
||||
return "", 0, 0, 0, 0
|
||||
}
|
||||
freq := d.freqs[bestIdx]
|
||||
|
||||
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
|
||||
now := t0
|
||||
r := rmsPCM(pcms)
|
||||
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
|
||||
now := t0
|
||||
|
||||
if !d.inA && !d.waitingB {
|
||||
// Looking for Tone A
|
||||
d.inA = true
|
||||
d.aFreq = freq
|
||||
d.aAccumMs = int(hopDur.Milliseconds())
|
||||
d.aStart = now
|
||||
} else if d.inA && !d.waitingB {
|
||||
// Confirming Tone A
|
||||
if math.Abs(freq-d.aFreq) <= 10.0 {
|
||||
d.aAccumMs += int(hopDur.Milliseconds())
|
||||
if d.aAccumMs >= d.minAms {
|
||||
d.inA = false
|
||||
d.waitingB = true
|
||||
d.gapRemainMs = d.gapMaxMs
|
||||
}
|
||||
} else {
|
||||
d.reset()
|
||||
if r < d.rmsThresh {
|
||||
d.logger.Debug("RMS below threshold, resetting",
|
||||
"time", now.Format(time.RFC3339),
|
||||
"rms", fmt.Sprintf("%.2f", r),
|
||||
"threshold", d.rmsThresh)
|
||||
d.reset()
|
||||
return "", 0, 0, 0, 0, time.Time{}
|
||||
}
|
||||
} else if d.waitingB {
|
||||
d.gapRemainMs -= int(hopDur.Milliseconds())
|
||||
if d.gapRemainMs <= 0 {
|
||||
d.reset()
|
||||
} else if math.Abs(freq-d.aFreq) > 10.0 {
|
||||
// Check for Tone B (different frequency)
|
||||
if d.bAccumMs == 0 {
|
||||
d.bFreq = freq
|
||||
d.bStart = now
|
||||
} else if math.Abs(freq-d.bFreq) > 10.0 {
|
||||
// Switched to a different frequency, reset B
|
||||
d.bFreq = freq
|
||||
d.bAccumMs = 0
|
||||
d.bStart = now
|
||||
}
|
||||
d.bAccumMs += int(hopDur.Milliseconds())
|
||||
if d.bAccumMs >= d.minBms {
|
||||
event = "TWO_TONE_DETECTED"
|
||||
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
|
||||
}
|
||||
|
||||
// Find frequency with highest power
|
||||
bestIdx := -1
|
||||
bestPow := 0.0
|
||||
for i, gz := range d.gzBank {
|
||||
p := gz.Power(xi)
|
||||
if p > bestPow {
|
||||
bestPow = p
|
||||
bestIdx = i
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", 0, 0, 0, 0
|
||||
ratio := bestPow / (total + 1e-12)
|
||||
if ratio < d.ratioThresh {
|
||||
d.logger.Debug("Ratio below threshold, resetting",
|
||||
"time", now.Format(time.RFC3339),
|
||||
"ratio", fmt.Sprintf("%.3f", ratio),
|
||||
"threshold", d.ratioThresh)
|
||||
d.reset()
|
||||
return "", 0, 0, 0, 0, time.Time{}
|
||||
}
|
||||
freq := d.freqs[bestIdx]
|
||||
|
||||
if !d.inA && !d.waitingB {
|
||||
// Looking for Tone A
|
||||
d.inA = true
|
||||
d.aFreq = freq
|
||||
d.aStart = now
|
||||
} else if d.inA && !d.waitingB {
|
||||
// Confirming Tone A
|
||||
if math.Abs(freq-d.aFreq) <= 10.0 {
|
||||
d.aAccumMs += int(hopDur.Milliseconds())
|
||||
d.aEnd = now.Add(hopDur)
|
||||
if d.aAccumMs >= d.minAms {
|
||||
d.inA = false
|
||||
d.waitingB = true
|
||||
d.gapRemainMs = d.gapMaxMs
|
||||
}
|
||||
} else {
|
||||
d.logger.Debug("Frequency differs from Tone A, resetting",
|
||||
"time", now.Format(time.RFC3339),
|
||||
"freq", fmt.Sprintf("%.1f", freq),
|
||||
"tone_a_freq", fmt.Sprintf("%.1f", d.aFreq))
|
||||
d.reset()
|
||||
}
|
||||
} else if d.waitingB {
|
||||
d.gapRemainMs -= int(hopDur.Milliseconds())
|
||||
if d.gapRemainMs <= 0 {
|
||||
d.logger.Debug("Gap exceeded max duration, resetting",
|
||||
"time", now.Format(time.RFC3339),
|
||||
"gap_max_ms", d.gapMaxMs)
|
||||
d.reset()
|
||||
} else if math.Abs(freq-d.aFreq) > 10.0 {
|
||||
// Check for Tone B
|
||||
if d.bAccumMs == 0 {
|
||||
d.bFreq = freq
|
||||
d.bStart = now
|
||||
} else if math.Abs(freq-d.bFreq) > 10.0 {
|
||||
d.logger.Debug("Frequency differs from Tone B, resetting B",
|
||||
"time", now.Format(time.RFC3339),
|
||||
"freq", fmt.Sprintf("%.1f", freq),
|
||||
"tone_b_freq", fmt.Sprintf("%.1f", d.bFreq))
|
||||
d.bFreq = freq
|
||||
d.bAccumMs = 0
|
||||
d.bStart = now
|
||||
}
|
||||
d.bAccumMs += int(hopDur.Milliseconds())
|
||||
d.bEnd = now.Add(hopDur)
|
||||
if d.bAccumMs >= d.minBms {
|
||||
event = "TWO_TONE_DETECTED"
|
||||
aDurMs := float64(d.aEnd.Sub(d.aStart).Milliseconds())
|
||||
bDurMs := float64(d.bEnd.Sub(d.bStart).Milliseconds())
|
||||
d.logger.Info("Two-tone detected",
|
||||
"time", now.Format(time.RFC3339),
|
||||
"tone_a_freq", fmt.Sprintf("%.1f", d.aFreq),
|
||||
"tone_a_duration_ms", fmt.Sprintf("%.0f", aDurMs),
|
||||
"tone_b_freq", fmt.Sprintf("%.1f", d.bFreq),
|
||||
"tone_b_duration_ms", fmt.Sprintf("%.0f", bDurMs))
|
||||
return event, d.aFreq, aDurMs, d.bFreq, bDurMs, now
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", 0, 0, 0, 0, time.Time{}
|
||||
}
|
||||
|
||||
// reset reinitializes all internal state fields of the twoToneDetector,
|
||||
// clearing any ongoing detection data and preparing the detector for a new
|
||||
// detection sequence. This includes resetting flags, frequency values,
|
||||
// accumulated durations, start/end timestamps, and gap tracking.
|
||||
func (d *twoToneDetector) reset() {
|
||||
d.inA = false
|
||||
d.aFreq = 0
|
||||
d.aAccumMs = 0
|
||||
d.aStart = time.Time{}
|
||||
d.waitingB = false
|
||||
d.bFreq = 0
|
||||
d.bAccumMs = 0
|
||||
d.bStart = time.Time{}
|
||||
d.gapRemainMs = 0
|
||||
d.inA = false
|
||||
d.aFreq = 0
|
||||
d.aAccumMs = 0
|
||||
d.aStart = time.Time{}
|
||||
d.aEnd = time.Time{}
|
||||
d.waitingB = false
|
||||
d.bFreq = 0
|
||||
d.bAccumMs = 0
|
||||
d.bStart = time.Time{}
|
||||
d.bEnd = time.Time{}
|
||||
d.gapRemainMs = 0
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
if *wavFile == "" {
|
||||
log.Fatal("WAV file path is required (use -wav flag)")
|
||||
}
|
||||
flag.Parse()
|
||||
|
||||
file, err := os.Open(*wavFile)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to open WAV file: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
// Initialize slog logger
|
||||
logLevel := &slog.LevelVar{}
|
||||
logLevel.Set(slog.LevelInfo)
|
||||
if *verbose {
|
||||
logLevel.Set(slog.LevelDebug)
|
||||
}
|
||||
logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
|
||||
Level: logLevel,
|
||||
}))
|
||||
|
||||
decoder := wav.NewDecoder(file)
|
||||
if !decoder.IsValidFile() {
|
||||
log.Fatal("Invalid WAV file")
|
||||
}
|
||||
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
|
||||
log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
|
||||
decoder.Format().SampleRate, decoder.Format().NumChannels)
|
||||
}
|
||||
|
||||
const fs = 8000
|
||||
winN := int(float64(fs) * float64(*winMs) / 1000.0)
|
||||
hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
|
||||
if winN <= 0 || hopN <= 0 || hopN > winN {
|
||||
log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
|
||||
}
|
||||
|
||||
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
|
||||
|
||||
buf := &audio.IntBuffer{
|
||||
Format: &audio.Format{SampleRate: fs, NumChannels: 1},
|
||||
Data: make([]int, 8192),
|
||||
SourceBitDepth: 16,
|
||||
}
|
||||
sampleCount := 0
|
||||
startTime := time.Now()
|
||||
|
||||
log.Println("Processing WAV file...")
|
||||
for {
|
||||
n, err := decoder.PCMBuffer(buf)
|
||||
if err != nil || n == 0 || len(buf.Data) == 0 {
|
||||
log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
|
||||
break
|
||||
if *wavFile == "" {
|
||||
logger.Error("WAV file path is required", "flag", "-wav")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
pcm := make([]int16, n)
|
||||
for i, v := range buf.Data[:n] {
|
||||
pcm[i] = int16(v)
|
||||
file, err := os.Open(*wavFile)
|
||||
if err != nil {
|
||||
logger.Error("Failed to open WAV file", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
sampleCount += n
|
||||
defer file.Close()
|
||||
|
||||
for offset := 0; offset <= len(pcm)-winN; offset += hopN {
|
||||
win := pcm[offset:min(offset+winN, len(pcm))]
|
||||
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
|
||||
event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
|
||||
if event != "" {
|
||||
fmt.Printf("Detected two-tone sequence:\n")
|
||||
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
|
||||
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
|
||||
det.reset()
|
||||
}
|
||||
decoder := wav.NewDecoder(file)
|
||||
if !decoder.IsValidFile() {
|
||||
logger.Error("Invalid WAV file")
|
||||
os.Exit(1)
|
||||
}
|
||||
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
|
||||
logger.Error("WAV file must be mono 8kHz",
|
||||
"sample_rate", decoder.Format().SampleRate,
|
||||
"channels", decoder.Format().NumChannels)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
const fs = 8000
|
||||
winN := int(float64(fs) * float64(*winMs) / 1000.0)
|
||||
hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
|
||||
if winN <= 0 || hopN <= 0 || hopN > winN {
|
||||
logger.Error("Invalid window/hop parameters",
|
||||
"winN", winN,
|
||||
"hopN", hopN)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs, logger)
|
||||
|
||||
buf := &audio.IntBuffer{
|
||||
Format: &audio.Format{SampleRate: fs, NumChannels: 1},
|
||||
Data: make([]int, 8192),
|
||||
SourceBitDepth: 16,
|
||||
}
|
||||
sampleCount := 0
|
||||
startTime := time.Now()
|
||||
|
||||
logger.Info("Processing WAV file")
|
||||
for {
|
||||
n, err := decoder.PCMBuffer(buf)
|
||||
if err != nil || n == 0 || len(buf.Data) == 0 {
|
||||
logger.Info("Finished processing",
|
||||
"samples", sampleCount,
|
||||
"duration_sec", fmt.Sprintf("%.2f", float64(sampleCount)/float64(fs)))
|
||||
break
|
||||
}
|
||||
|
||||
pcm := make([]int16, n)
|
||||
for i, v := range buf.Data[:n] {
|
||||
pcm[i] = int16(v)
|
||||
}
|
||||
sampleCount += n
|
||||
|
||||
for offset := 0; offset <= len(pcm)-winN; offset += hopN {
|
||||
win := pcm[offset:min(offset+winN, len(pcm))]
|
||||
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
|
||||
event, aFreq, aDur, bFreq, bDur, timestamp := det.stepWindow(win, t)
|
||||
if event != "" {
|
||||
fmt.Printf("Detected two-tone sequence at %s:\n", timestamp.Format(time.RFC3339))
|
||||
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
|
||||
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
|
||||
det.reset()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// min returns the smaller of two integer values a and b.
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
Reference in New Issue
Block a user