Refactor main.go for improved readability and add detailed comments for functions
This commit is contained in:
534
main.go
534
main.go
@ -2,291 +2,353 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/go-audio/audio"
|
"github.com/go-audio/audio"
|
||||||
"github.com/go-audio/wav"
|
"github.com/go-audio/wav"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Command-line flags
|
// Command-line flags
|
||||||
var (
|
var (
|
||||||
wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file")
|
wavFile = flag.String("wav", "", "Path to mono 8kHz WAV file")
|
||||||
minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
|
minAms = flag.Int("minA", 1000, "Minimum Tone A duration (ms)")
|
||||||
minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
|
minBms = flag.Int("minB", 3000, "Minimum Tone B duration (ms)")
|
||||||
gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)")
|
gapMaxMs = flag.Int("gap", 5000, "Max gap between A and B (ms)")
|
||||||
winMs = flag.Int("win", 100, "Window size (ms)")
|
winMs = flag.Int("win", 100, "Window size (ms)")
|
||||||
hopMs = flag.Int("hop", 50, "Hop size (ms)")
|
hopMs = flag.Int("hop", 50, "Hop size (ms)")
|
||||||
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
|
ratioThresh = flag.Float64("ratio", 0.65, "Power ratio threshold for tone detection")
|
||||||
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
|
rmsThresh = flag.Float64("rms", 300.0, "Minimum RMS for valid signal")
|
||||||
)
|
)
|
||||||
|
|
||||||
// Goertzel struct for frequency detection
|
// Goertzel struct for frequency detection
|
||||||
type goertzel struct {
|
type goertzel struct {
|
||||||
N int
|
N int
|
||||||
fs float64
|
fs float64
|
||||||
k int
|
k int
|
||||||
coeff float64
|
coeff float64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// newGoertzel initializes and returns a new instance of the Goertzel algorithm for detecting a specific target frequency.
|
||||||
|
// Parameters:
|
||||||
|
//
|
||||||
|
// targetHz - the target frequency in Hertz to detect.
|
||||||
|
// fs - the sampling rate in Hertz.
|
||||||
|
// N - the number of samples to process.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
//
|
||||||
|
// A pointer to a goertzel struct configured for the specified frequency and sample rate.
|
||||||
func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
|
func newGoertzel(targetHz float64, fs float64, N int) *goertzel {
|
||||||
g := &goertzel{N: N, fs: fs}
|
g := &goertzel{N: N, fs: fs}
|
||||||
g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
|
g.k = int(0.5 + (float64(g.N)*targetHz)/fs)
|
||||||
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
|
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
|
||||||
g.coeff = 2.0 * math.Cos(omega)
|
g.coeff = 2.0 * math.Cos(omega)
|
||||||
return g
|
return g
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Power computes the power of the target frequency in the input signal x using the Goertzel algorithm.
|
||||||
|
// It processes the input slice x of length g.N and returns the squared magnitude of the frequency component
|
||||||
|
// specified by g.k. The function is typically used for efficient detection of specific frequencies in a signal.
|
||||||
func (g *goertzel) Power(x []float64) float64 {
|
func (g *goertzel) Power(x []float64) float64 {
|
||||||
var s0, s1, s2 float64
|
var s0, s1, s2 float64
|
||||||
for i := 0; i < g.N; i++ {
|
for i := 0; i < g.N; i++ {
|
||||||
s0 = x[i] + g.coeff*s1 - s2
|
s0 = x[i] + g.coeff*s1 - s2
|
||||||
s2 = s1
|
s2 = s1
|
||||||
s1 = s0
|
s1 = s0
|
||||||
}
|
}
|
||||||
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
|
omega := (2.0 * math.Pi * float64(g.k)) / float64(g.N)
|
||||||
real := s1 - s2*math.Cos(omega)
|
real := s1 - s2*math.Cos(omega)
|
||||||
imag := s2 * math.Sin(omega)
|
imag := s2 * math.Sin(omega)
|
||||||
return real*real + imag*imag
|
return real*real + imag*imag
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// windowHann applies a Hann window to the input slice x in-place.
|
||||||
|
// The Hann window is commonly used in signal processing to reduce spectral leakage
|
||||||
|
// by tapering the beginning and end of the signal to zero.
|
||||||
|
// The function modifies the input slice directly.
|
||||||
func windowHann(x []float64) {
|
func windowHann(x []float64) {
|
||||||
n := float64(len(x))
|
n := float64(len(x))
|
||||||
for i := range x {
|
for i := range x {
|
||||||
x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
|
x[i] *= 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/(n-1.0)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pcmToFloat converts a slice of 16-bit PCM audio samples to a slice of float64 values.
|
||||||
|
// It processes up to N samples from the input buffer and returns the converted values.
|
||||||
|
// If the input buffer has fewer than N samples, only the available samples are converted.
|
||||||
func pcmToFloat(buf []int16, N int) []float64 {
|
func pcmToFloat(buf []int16, N int) []float64 {
|
||||||
out := make([]float64, N)
|
out := make([]float64, N)
|
||||||
for i := 0; i < N && i < len(buf); i++ {
|
for i := 0; i < N && i < len(buf); i++ {
|
||||||
out[i] = float64(buf[i])
|
out[i] = float64(buf[i])
|
||||||
}
|
}
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// rmsPCM calculates the root mean square (RMS) value of a slice of 16-bit PCM audio samples.
|
||||||
|
// It returns the RMS as a float64, which is a measure of the signal's amplitude.
|
||||||
|
// If the input slice is empty, it returns 0.
|
||||||
func rmsPCM(buf []int16) float64 {
|
func rmsPCM(buf []int16) float64 {
|
||||||
var s float64
|
var s float64
|
||||||
for _, v := range buf {
|
for _, v := range buf {
|
||||||
f := float64(v)
|
f := float64(v)
|
||||||
s += f * f
|
s += f * f
|
||||||
}
|
}
|
||||||
if len(buf) == 0 {
|
if len(buf) == 0 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
return math.Sqrt(s / float64(len(buf)))
|
return math.Sqrt(s / float64(len(buf)))
|
||||||
}
|
}
|
||||||
|
|
||||||
// twoToneDetector for detecting tone sequences
|
// twoToneDetector for detecting tone sequences
|
||||||
type twoToneDetector struct {
|
type twoToneDetector struct {
|
||||||
fs int
|
fs int
|
||||||
winN int
|
winN int
|
||||||
hopN int
|
hopN int
|
||||||
ratioThresh float64
|
ratioThresh float64
|
||||||
rmsThresh float64
|
rmsThresh float64
|
||||||
minAms int
|
minAms int
|
||||||
minBms int
|
minBms int
|
||||||
gapMaxMs int
|
gapMaxMs int
|
||||||
freqs []float64
|
freqs []float64
|
||||||
gzBank []*goertzel
|
gzBank []*goertzel
|
||||||
inA bool
|
inA bool
|
||||||
aFreq float64
|
aFreq float64
|
||||||
aAccumMs int
|
aAccumMs int
|
||||||
aStart time.Time
|
aStart time.Time
|
||||||
waitingB bool
|
waitingB bool
|
||||||
bFreq float64
|
bFreq float64
|
||||||
bAccumMs int
|
bAccumMs int
|
||||||
bStart time.Time
|
bStart time.Time
|
||||||
gapRemainMs int
|
gapRemainMs int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// newTwoToneDetector creates and initializes a twoToneDetector instance with the specified parameters.
|
||||||
|
// It sets up a bank of Goertzel filters for detecting tones in the frequency range 300–3000 Hz (in 10 Hz steps).
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
//
|
||||||
|
// fs - Sample rate in Hz.
|
||||||
|
// winN - Window size (number of samples per analysis window).
|
||||||
|
// hopN - Hop size (number of samples to advance per analysis).
|
||||||
|
// ratioThresh- Threshold for the ratio used in tone detection.
|
||||||
|
// rmsThresh - RMS threshold for signal energy.
|
||||||
|
// minAms - Minimum duration of a detected tone in milliseconds.
|
||||||
|
// minBms - Minimum duration of a break between tones in milliseconds.
|
||||||
|
// gapMaxMs - Maximum allowed gap between tones in milliseconds.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
//
|
||||||
|
// Pointer to an initialized twoToneDetector.
|
||||||
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector {
|
func newTwoToneDetector(fs, winN, hopN int, ratioThresh, rmsThresh float64, minAms, minBms, gapMaxMs int) *twoToneDetector {
|
||||||
// Frequency range: 300–3000 Hz, 10 Hz steps
|
// Frequency range: 300–3000 Hz, 10 Hz steps
|
||||||
freqs := make([]float64, 0)
|
freqs := make([]float64, 0)
|
||||||
for f := 300.0; f <= 3000.0; f += 10.0 {
|
for f := 300.0; f <= 3000.0; f += 10.0 {
|
||||||
freqs = append(freqs, f)
|
freqs = append(freqs, f)
|
||||||
}
|
}
|
||||||
gzBank := make([]*goertzel, len(freqs))
|
gzBank := make([]*goertzel, len(freqs))
|
||||||
for i, f := range freqs {
|
for i, f := range freqs {
|
||||||
gzBank[i] = newGoertzel(f, float64(fs), winN)
|
gzBank[i] = newGoertzel(f, float64(fs), winN)
|
||||||
}
|
}
|
||||||
return &twoToneDetector{
|
return &twoToneDetector{
|
||||||
fs: fs,
|
fs: fs,
|
||||||
winN: winN,
|
winN: winN,
|
||||||
hopN: hopN,
|
hopN: hopN,
|
||||||
ratioThresh: ratioThresh,
|
ratioThresh: ratioThresh,
|
||||||
rmsThresh: rmsThresh,
|
rmsThresh: rmsThresh,
|
||||||
minAms: minAms,
|
minAms: minAms,
|
||||||
minBms: minBms,
|
minBms: minBms,
|
||||||
gapMaxMs: gapMaxMs,
|
gapMaxMs: gapMaxMs,
|
||||||
freqs: freqs,
|
freqs: freqs,
|
||||||
gzBank: gzBank,
|
gzBank: gzBank,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// stepWindow processes a window of PCM audio samples to detect a two-tone event.
|
||||||
|
// It applies a Hann window, computes the RMS, and searches for the strongest frequency.
|
||||||
|
// The function tracks the presence and duration of two distinct tones (A and B) separated by a gap.
|
||||||
|
// If both tones are detected with sufficient duration and within specified thresholds, it returns
|
||||||
|
// an event string ("TWO_TONE_DETECTED") along with the frequencies and durations of tones A and B.
|
||||||
|
// If detection criteria are not met, it resets the detector state and returns zero values.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
//
|
||||||
|
// pcms []int16 - Slice of PCM audio samples for the current window.
|
||||||
|
// t0 time.Time - Timestamp corresponding to the start of the window.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
//
|
||||||
|
// event string - Event name if two-tone detected, otherwise empty string.
|
||||||
|
// aFreq float64 - Frequency of tone A (Hz).
|
||||||
|
// aDur float64 - Duration of tone A (milliseconds).
|
||||||
|
// bFreq float64 - Frequency of tone B (Hz).
|
||||||
|
// bDur float64 - Duration of tone B (milliseconds).
|
||||||
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) {
|
func (d *twoToneDetector) stepWindow(pcms []int16, t0 time.Time) (event string, aFreq, aDur, bFreq, bDur float64) {
|
||||||
xi := pcmToFloat(pcms, d.winN)
|
xi := pcmToFloat(pcms, d.winN)
|
||||||
windowHann(xi)
|
windowHann(xi)
|
||||||
|
|
||||||
var total float64
|
var total float64
|
||||||
for _, v := range xi {
|
for _, v := range xi {
|
||||||
total += v * v
|
total += v * v
|
||||||
}
|
}
|
||||||
|
|
||||||
r := rmsPCM(pcms)
|
r := rmsPCM(pcms)
|
||||||
if r < d.rmsThresh {
|
if r < d.rmsThresh {
|
||||||
d.reset()
|
d.reset()
|
||||||
|
return "", 0, 0, 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find frequency with highest power
|
||||||
|
bestIdx := -1
|
||||||
|
bestPow := 0.0
|
||||||
|
for i, gz := range d.gzBank {
|
||||||
|
p := gz.Power(xi)
|
||||||
|
if p > bestPow {
|
||||||
|
bestPow = p
|
||||||
|
bestIdx = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ratio := bestPow / (total + 1e-12)
|
||||||
|
if ratio < d.ratioThresh {
|
||||||
|
d.reset()
|
||||||
|
return "", 0, 0, 0, 0
|
||||||
|
}
|
||||||
|
freq := d.freqs[bestIdx]
|
||||||
|
|
||||||
|
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
|
||||||
|
now := t0
|
||||||
|
|
||||||
|
if !d.inA && !d.waitingB {
|
||||||
|
// Looking for Tone A
|
||||||
|
d.inA = true
|
||||||
|
d.aFreq = freq
|
||||||
|
d.aAccumMs = int(hopDur.Milliseconds())
|
||||||
|
d.aStart = now
|
||||||
|
} else if d.inA && !d.waitingB {
|
||||||
|
// Confirming Tone A
|
||||||
|
if math.Abs(freq-d.aFreq) <= 10.0 {
|
||||||
|
d.aAccumMs += int(hopDur.Milliseconds())
|
||||||
|
if d.aAccumMs >= d.minAms {
|
||||||
|
d.inA = false
|
||||||
|
d.waitingB = true
|
||||||
|
d.gapRemainMs = d.gapMaxMs
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
d.reset()
|
||||||
|
}
|
||||||
|
} else if d.waitingB {
|
||||||
|
d.gapRemainMs -= int(hopDur.Milliseconds())
|
||||||
|
if d.gapRemainMs <= 0 {
|
||||||
|
d.reset()
|
||||||
|
} else if math.Abs(freq-d.aFreq) > 10.0 {
|
||||||
|
// Check for Tone B (different frequency)
|
||||||
|
if d.bAccumMs == 0 {
|
||||||
|
d.bFreq = freq
|
||||||
|
d.bStart = now
|
||||||
|
} else if math.Abs(freq-d.bFreq) > 10.0 {
|
||||||
|
// Switched to a different frequency, reset B
|
||||||
|
d.bFreq = freq
|
||||||
|
d.bAccumMs = 0
|
||||||
|
d.bStart = now
|
||||||
|
}
|
||||||
|
d.bAccumMs += int(hopDur.Milliseconds())
|
||||||
|
if d.bAccumMs >= d.minBms {
|
||||||
|
event = "TWO_TONE_DETECTED"
|
||||||
|
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return "", 0, 0, 0, 0
|
return "", 0, 0, 0, 0
|
||||||
}
|
|
||||||
|
|
||||||
// Find frequency with highest power
|
|
||||||
bestIdx := -1
|
|
||||||
bestPow := 0.0
|
|
||||||
for i, gz := range d.gzBank {
|
|
||||||
p := gz.Power(xi)
|
|
||||||
if p > bestPow {
|
|
||||||
bestPow = p
|
|
||||||
bestIdx = i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ratio := bestPow / (total + 1e-12)
|
|
||||||
if ratio < d.ratioThresh {
|
|
||||||
d.reset()
|
|
||||||
return "", 0, 0, 0, 0
|
|
||||||
}
|
|
||||||
freq := d.freqs[bestIdx]
|
|
||||||
|
|
||||||
hopDur := time.Millisecond * time.Duration(int(float64(d.hopN)*1000.0/float64(d.fs)))
|
|
||||||
now := t0
|
|
||||||
|
|
||||||
if !d.inA && !d.waitingB {
|
|
||||||
// Looking for Tone A
|
|
||||||
d.inA = true
|
|
||||||
d.aFreq = freq
|
|
||||||
d.aAccumMs = int(hopDur.Milliseconds())
|
|
||||||
d.aStart = now
|
|
||||||
} else if d.inA && !d.waitingB {
|
|
||||||
// Confirming Tone A
|
|
||||||
if math.Abs(freq-d.aFreq) <= 10.0 {
|
|
||||||
d.aAccumMs += int(hopDur.Milliseconds())
|
|
||||||
if d.aAccumMs >= d.minAms {
|
|
||||||
d.inA = false
|
|
||||||
d.waitingB = true
|
|
||||||
d.gapRemainMs = d.gapMaxMs
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
d.reset()
|
|
||||||
}
|
|
||||||
} else if d.waitingB {
|
|
||||||
d.gapRemainMs -= int(hopDur.Milliseconds())
|
|
||||||
if d.gapRemainMs <= 0 {
|
|
||||||
d.reset()
|
|
||||||
} else if math.Abs(freq-d.aFreq) > 10.0 {
|
|
||||||
// Check for Tone B (different frequency)
|
|
||||||
if d.bAccumMs == 0 {
|
|
||||||
d.bFreq = freq
|
|
||||||
d.bStart = now
|
|
||||||
} else if math.Abs(freq-d.bFreq) > 10.0 {
|
|
||||||
// Switched to a different frequency, reset B
|
|
||||||
d.bFreq = freq
|
|
||||||
d.bAccumMs = 0
|
|
||||||
d.bStart = now
|
|
||||||
}
|
|
||||||
d.bAccumMs += int(hopDur.Milliseconds())
|
|
||||||
if d.bAccumMs >= d.minBms {
|
|
||||||
event = "TWO_TONE_DETECTED"
|
|
||||||
return event, d.aFreq, float64(d.aAccumMs), d.bFreq, float64(d.bAccumMs)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return "", 0, 0, 0, 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// reset reinitializes all internal state fields of the twoToneDetector to their default values.
|
||||||
|
// This includes clearing detection flags, frequencies, accumulated durations, start times, and gap timers.
|
||||||
func (d *twoToneDetector) reset() {
|
func (d *twoToneDetector) reset() {
|
||||||
d.inA = false
|
d.inA = false
|
||||||
d.aFreq = 0
|
d.aFreq = 0
|
||||||
d.aAccumMs = 0
|
d.aAccumMs = 0
|
||||||
d.aStart = time.Time{}
|
d.aStart = time.Time{}
|
||||||
d.waitingB = false
|
d.waitingB = false
|
||||||
d.bFreq = 0
|
d.bFreq = 0
|
||||||
d.bAccumMs = 0
|
d.bAccumMs = 0
|
||||||
d.bStart = time.Time{}
|
d.bStart = time.Time{}
|
||||||
d.gapRemainMs = 0
|
d.gapRemainMs = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
if *wavFile == "" {
|
if *wavFile == "" {
|
||||||
log.Fatal("WAV file path is required (use -wav flag)")
|
log.Fatal("WAV file path is required (use -wav flag)")
|
||||||
}
|
|
||||||
|
|
||||||
file, err := os.Open(*wavFile)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Failed to open WAV file: %v", err)
|
|
||||||
}
|
|
||||||
defer file.Close()
|
|
||||||
|
|
||||||
decoder := wav.NewDecoder(file)
|
|
||||||
if !decoder.IsValidFile() {
|
|
||||||
log.Fatal("Invalid WAV file")
|
|
||||||
}
|
|
||||||
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
|
|
||||||
log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
|
|
||||||
decoder.Format().SampleRate, decoder.Format().NumChannels)
|
|
||||||
}
|
|
||||||
|
|
||||||
const fs = 8000
|
|
||||||
winN := int(float64(fs) * float64(*winMs) / 1000.0)
|
|
||||||
hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
|
|
||||||
if winN <= 0 || hopN <= 0 || hopN > winN {
|
|
||||||
log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
|
|
||||||
}
|
|
||||||
|
|
||||||
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
|
|
||||||
|
|
||||||
buf := &audio.IntBuffer{
|
|
||||||
Format: &audio.Format{SampleRate: fs, NumChannels: 1},
|
|
||||||
Data: make([]int, 8192),
|
|
||||||
SourceBitDepth: 16,
|
|
||||||
}
|
|
||||||
sampleCount := 0
|
|
||||||
startTime := time.Now()
|
|
||||||
|
|
||||||
log.Println("Processing WAV file...")
|
|
||||||
for {
|
|
||||||
n, err := decoder.PCMBuffer(buf)
|
|
||||||
if err != nil || n == 0 || len(buf.Data) == 0 {
|
|
||||||
log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pcm := make([]int16, n)
|
file, err := os.Open(*wavFile)
|
||||||
for i, v := range buf.Data[:n] {
|
if err != nil {
|
||||||
pcm[i] = int16(v)
|
log.Fatalf("Failed to open WAV file: %v", err)
|
||||||
}
|
}
|
||||||
sampleCount += n
|
defer file.Close()
|
||||||
|
|
||||||
for offset := 0; offset <= len(pcm)-winN; offset += hopN {
|
decoder := wav.NewDecoder(file)
|
||||||
win := pcm[offset:min(offset+winN, len(pcm))]
|
if !decoder.IsValidFile() {
|
||||||
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
|
log.Fatal("Invalid WAV file")
|
||||||
event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
|
}
|
||||||
if event != "" {
|
if decoder.Format().SampleRate != 8000 || decoder.Format().NumChannels != 1 {
|
||||||
fmt.Printf("Detected two-tone sequence:\n")
|
log.Fatalf("WAV file must be mono 8kHz, got %d Hz, %d channels",
|
||||||
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
|
decoder.Format().SampleRate, decoder.Format().NumChannels)
|
||||||
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
|
}
|
||||||
det.reset()
|
|
||||||
}
|
const fs = 8000
|
||||||
|
winN := int(float64(fs) * float64(*winMs) / 1000.0)
|
||||||
|
hopN := int(float64(fs) * float64(*hopMs) / 1000.0)
|
||||||
|
if winN <= 0 || hopN <= 0 || hopN > winN {
|
||||||
|
log.Fatalf("Invalid window/hop: winN=%d, hopN=%d", winN, hopN)
|
||||||
|
}
|
||||||
|
|
||||||
|
det := newTwoToneDetector(fs, winN, hopN, *ratioThresh, *rmsThresh, *minAms, *minBms, *gapMaxMs)
|
||||||
|
|
||||||
|
buf := &audio.IntBuffer{
|
||||||
|
Format: &audio.Format{SampleRate: fs, NumChannels: 1},
|
||||||
|
Data: make([]int, 8192),
|
||||||
|
SourceBitDepth: 16,
|
||||||
|
}
|
||||||
|
sampleCount := 0
|
||||||
|
startTime := time.Now()
|
||||||
|
|
||||||
|
log.Println("Processing WAV file...")
|
||||||
|
for {
|
||||||
|
n, err := decoder.PCMBuffer(buf)
|
||||||
|
if err != nil || n == 0 || len(buf.Data) == 0 {
|
||||||
|
log.Printf("Finished processing %d samples (%.2f seconds)", sampleCount, float64(sampleCount)/float64(fs))
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
pcm := make([]int16, n)
|
||||||
|
for i, v := range buf.Data[:n] {
|
||||||
|
pcm[i] = int16(v)
|
||||||
|
}
|
||||||
|
sampleCount += n
|
||||||
|
|
||||||
|
for offset := 0; offset <= len(pcm)-winN; offset += hopN {
|
||||||
|
win := pcm[offset:min(offset+winN, len(pcm))]
|
||||||
|
t := startTime.Add(time.Duration(sampleCount-len(pcm)+offset) * time.Second / time.Duration(fs))
|
||||||
|
event, aFreq, aDur, bFreq, bDur := det.stepWindow(win, t)
|
||||||
|
if event != "" {
|
||||||
|
fmt.Printf("Detected two-tone sequence:\n")
|
||||||
|
fmt.Printf(" Tone A: %.1f Hz, duration %.0f ms\n", aFreq, aDur)
|
||||||
|
fmt.Printf(" Tone B: %.1f Hz, duration %.0f ms\n", bFreq, bDur)
|
||||||
|
det.reset()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// min returns the smaller of two integer values a and b.
|
||||||
func min(a, b int) int {
|
func min(a, b int) int {
|
||||||
if a < b {
|
if a < b {
|
||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user