Compare commits
2 Commits
main
...
feature-va
Author | SHA1 | Date | |
---|---|---|---|
8e91b0067f | |||
265c142edb |
@ -1,2 +1,5 @@
|
||||
rtsp_url: "rtsp://10.10.10.104:8554/union"
|
||||
output_file: "output.wav"
|
||||
record_segment_duration: "30m" # Duration for each recording segment
|
||||
vad_mode: 1
|
||||
frame_ms: 10
|
1
go.mod
1
go.mod
@ -5,6 +5,7 @@ go 1.25
|
||||
require (
|
||||
github.com/bluenviron/gortsplib/v4 v4.16.2
|
||||
github.com/bluenviron/mediacommon/v2 v2.4.1
|
||||
github.com/maxhawkins/go-webrtcvad v0.0.0-20210121163624-be60036f3083
|
||||
github.com/pion/rtp v1.8.21
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
2
go.sum
2
go.sum
@ -6,6 +6,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/maxhawkins/go-webrtcvad v0.0.0-20210121163624-be60036f3083 h1:0JDcvP4R28p6+u8VIHCwYx7UwiHZ074INz3C397oc9s=
|
||||
github.com/maxhawkins/go-webrtcvad v0.0.0-20210121163624-be60036f3083/go.mod h1:YdrZ05xnooeP54y7m+/UvI23O1Td46PjWkLJu1VLObM=
|
||||
github.com/pion/logging v0.2.4 h1:tTew+7cmQ+Mc1pTBLKH2puKsOvhm32dROumOZ655zB8=
|
||||
github.com/pion/logging v0.2.4/go.mod h1:DffhXTKYdNZU+KtJ5pyQDjvOAh/GsNSyv1lbkFbe3so=
|
||||
github.com/pion/randutil v0.1.0 h1:CFG1UdESneORglEsnimhUjf33Rwjubwj6xfiOXBa3mA=
|
||||
|
362
main.go
362
main.go
@ -6,21 +6,28 @@ import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/bluenviron/gortsplib/v4"
|
||||
"github.com/bluenviron/gortsplib/v4/pkg/base"
|
||||
"github.com/bluenviron/gortsplib/v4/pkg/description"
|
||||
"github.com/bluenviron/gortsplib/v4/pkg/format"
|
||||
"github.com/bluenviron/mediacommon/v2/pkg/codecs/g711"
|
||||
"github.com/maxhawkins/go-webrtcvad"
|
||||
"github.com/pion/rtp"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// Config holds RTSP URL and output file path
|
||||
// Config holds RTSP URL, output file path, rotation settings, and VAD settings
|
||||
type Config struct {
|
||||
RTSPURL string `yaml:"rtsp_url"`
|
||||
OutputFile string `yaml:"output_file"`
|
||||
RTSPURL string `yaml:"rtsp_url"`
|
||||
OutputFile string `yaml:"output_file"`
|
||||
RecordSegmentDuration string `yaml:"record_segment_duration"`
|
||||
VADMode int `yaml:"vad_mode"`
|
||||
FrameMs int `yaml:"frame_ms"`
|
||||
}
|
||||
|
||||
// Event represents lifecycle events (e.g., source ready, recording started)
|
||||
@ -44,17 +51,35 @@ type Data struct {
|
||||
Pkt *rtp.Packet
|
||||
}
|
||||
|
||||
// Recorder saves packets to a file
|
||||
// Recorder saves packets to a file with rotation
|
||||
type Recorder struct {
|
||||
name string
|
||||
path *Path
|
||||
file *os.File
|
||||
writer *bufio.Writer
|
||||
isMuLaw bool // true for μ-law, false for A-law
|
||||
dataChan chan *Data
|
||||
terminate chan struct{}
|
||||
sampleRate int
|
||||
channels int
|
||||
name string
|
||||
path *Path
|
||||
file *os.File
|
||||
writer *bufio.Writer
|
||||
isMuLaw bool // true for μ-law, false for A-law
|
||||
dataChan chan *Data
|
||||
terminate chan struct{}
|
||||
sampleRate int
|
||||
channels int
|
||||
outputBase string // Base filename (without timestamp) // Added
|
||||
segmentDur time.Duration // Added
|
||||
segmentStart time.Time // Added
|
||||
totalDataSize uint32 // Added (moved from local variable)
|
||||
fileMu sync.Mutex // Protects file operations // Added
|
||||
}
|
||||
|
||||
// VADReader detects speech in audio packets using WebRTC VAD
|
||||
type VADReader struct {
|
||||
name string
|
||||
path *Path
|
||||
isMuLaw bool
|
||||
dataChan chan *Data
|
||||
terminate chan struct{}
|
||||
vad *webrtcvad.VAD
|
||||
sampleRate int
|
||||
frameMs int
|
||||
segmentStart time.Time // Tracks start of current audio/silence segment
|
||||
}
|
||||
|
||||
// NewPath creates a stream hub
|
||||
@ -103,26 +128,38 @@ func (p *Path) SourceReady(audioMedia *description.Media, audioFormat format.For
|
||||
}
|
||||
|
||||
// NewRecorder creates a recorder that subscribes to the path
|
||||
func NewRecorder(path *Path, outputFile string, isMuLaw bool) (*Recorder, error) {
|
||||
file, err := os.Create(outputFile)
|
||||
func NewRecorder(path *Path, outputFile string, isMuLaw bool, segmentDur time.Duration) (*Recorder, error) {
|
||||
// Initialize with a timestamped filename
|
||||
fileName := formatFileName(outputFile, time.Now()) // Added
|
||||
file, err := os.Create(fileName) // Modified
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create file: %w", err)
|
||||
return nil, fmt.Errorf("create file %s: %w", fileName, err) // Modified
|
||||
}
|
||||
r := &Recorder{
|
||||
name: "recorder",
|
||||
path: path,
|
||||
file: file,
|
||||
writer: bufio.NewWriter(file),
|
||||
isMuLaw: isMuLaw,
|
||||
dataChan: make(chan *Data, 100), // Buffered channel
|
||||
terminate: make(chan struct{}),
|
||||
sampleRate: 8000, // Fixed for G.711
|
||||
channels: 1, // Mono
|
||||
name: "recorder",
|
||||
path: path,
|
||||
file: file,
|
||||
writer: bufio.NewWriter(file),
|
||||
isMuLaw: isMuLaw,
|
||||
dataChan: make(chan *Data, 100), // Buffered channel
|
||||
terminate: make(chan struct{}),
|
||||
sampleRate: 8000, // Fixed for G.711
|
||||
channels: 1, // Mono
|
||||
outputBase: outputFile, // Added
|
||||
segmentDur: segmentDur, // Added
|
||||
segmentStart: time.Now(), // Added
|
||||
}
|
||||
path.AddReader(r.name, r.dataChan)
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// formatFileName generates a timestamped filename (e.g., output_2025-08-19_10-24-00.wav)
|
||||
func formatFileName(baseName string, t time.Time) string {
|
||||
ext := filepath.Ext(baseName)
|
||||
name := strings.TrimSuffix(baseName, ext)
|
||||
return fmt.Sprintf("%s_%s%s", name, t.Format("2006-01-02_15-04-05"), ext)
|
||||
}
|
||||
|
||||
// writeWAVHeader writes a basic WAV header for PCM audio
|
||||
func (r *Recorder) writeWAVHeader(dataSize uint32) error {
|
||||
header := make([]byte, 44)
|
||||
@ -143,6 +180,50 @@ func (r *Recorder) writeWAVHeader(dataSize uint32) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// rotateFile closes the current file and opens a new one
|
||||
func (r *Recorder) rotateFile() error {
|
||||
r.fileMu.Lock()
|
||||
defer r.fileMu.Unlock()
|
||||
|
||||
// Flush and close current file
|
||||
if err := r.writer.Flush(); err != nil {
|
||||
return fmt.Errorf("flush file: %w", err)
|
||||
}
|
||||
if _, err := r.file.Seek(4, 0); err != nil {
|
||||
return fmt.Errorf("seek file: %w", err)
|
||||
}
|
||||
var buf [4]byte
|
||||
binary.LittleEndian.PutUint32(buf[:], 36+r.totalDataSize)
|
||||
r.file.Write(buf[:])
|
||||
if _, err := r.file.Seek(40, 0); err != nil {
|
||||
return fmt.Errorf("seek file: %w", err)
|
||||
}
|
||||
binary.LittleEndian.PutUint32(buf[:], r.totalDataSize)
|
||||
r.file.Write(buf[:])
|
||||
if err := r.file.Close(); err != nil {
|
||||
return fmt.Errorf("close file: %w", err)
|
||||
}
|
||||
|
||||
// Open new file
|
||||
fileName := formatFileName(r.outputBase, time.Now())
|
||||
file, err := os.Create(fileName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create new file %s: %w", fileName, err)
|
||||
}
|
||||
r.file = file
|
||||
r.writer = bufio.NewWriter(file)
|
||||
r.totalDataSize = 0
|
||||
r.segmentStart = time.Now()
|
||||
|
||||
// Write new WAV header
|
||||
if err := r.writeWAVHeader(0); err != nil {
|
||||
return fmt.Errorf("write new header: %w", err)
|
||||
}
|
||||
slog.Info("Rotated to new file", "filename", fileName)
|
||||
r.path.eventChan <- Event{Type: "segment_complete", Data: fileName}
|
||||
return nil
|
||||
}
|
||||
|
||||
// muLawToPCM converts μ-law samples to 16-bit PCM
|
||||
func muLawToPCM(sample byte) int16 {
|
||||
const muLawBias = 33
|
||||
@ -193,11 +274,134 @@ func (r *Recorder) Start() {
|
||||
if err := r.writeWAVHeader(0); err != nil {
|
||||
slog.Error("Write header error", "error", err)
|
||||
}
|
||||
var totalDataSize uint32
|
||||
r.totalDataSize = 0 // Moved from local variable to struct field
|
||||
|
||||
go func() {
|
||||
defer r.file.Close()
|
||||
defer r.writer.Flush()
|
||||
for {
|
||||
select {
|
||||
case data := <-r.dataChan:
|
||||
if data.Media.Type == "audio" {
|
||||
// Check for rotation
|
||||
if time.Since(r.segmentStart) >= r.segmentDur { // Added
|
||||
if err := r.rotateFile(); err != nil {
|
||||
slog.Error("File rotation error", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Unmarshal G.711 payload
|
||||
var samples []byte
|
||||
var err error
|
||||
if r.isMuLaw {
|
||||
var mu g711.Mulaw
|
||||
mu.Unmarshal(data.Pkt.Payload)
|
||||
samples = []byte(mu)
|
||||
} else {
|
||||
var al g711.Alaw
|
||||
al.Unmarshal(data.Pkt.Payload)
|
||||
samples = []byte(al)
|
||||
}
|
||||
if err != nil {
|
||||
slog.Error("Unmarshal audio error", "error", err)
|
||||
continue
|
||||
}
|
||||
// Convert to 16-bit PCM and write
|
||||
r.fileMu.Lock() // Added
|
||||
for _, sample := range samples {
|
||||
var pcmSample int16
|
||||
if r.isMuLaw {
|
||||
pcmSample = muLawToPCM(sample)
|
||||
} else {
|
||||
pcmSample = aLawToPCM(sample)
|
||||
}
|
||||
var buf [2]byte
|
||||
binary.LittleEndian.PutUint16(buf[:], uint16(pcmSample))
|
||||
_, err := r.writer.Write(buf[:])
|
||||
if err != nil {
|
||||
slog.Error("Write error", "error", err)
|
||||
}
|
||||
r.totalDataSize += 2
|
||||
}
|
||||
r.fileMu.Unlock() // Added
|
||||
}
|
||||
case <-r.terminate:
|
||||
// Update WAV header with final data size
|
||||
r.fileMu.Lock() // Added
|
||||
if err := r.writer.Flush(); err != nil {
|
||||
slog.Error("Flush error", "error", err)
|
||||
}
|
||||
if _, err := r.file.Seek(4, 0); err != nil {
|
||||
slog.Error("Seek error", "error", err)
|
||||
return
|
||||
}
|
||||
var buf [4]byte
|
||||
binary.LittleEndian.PutUint32(buf[:], 36+r.totalDataSize)
|
||||
r.file.Write(buf[:])
|
||||
if _, err := r.file.Seek(40, 0); err != nil {
|
||||
slog.Error("Seek error", "error", err)
|
||||
return
|
||||
}
|
||||
binary.LittleEndian.PutUint32(buf[:], r.totalDataSize)
|
||||
r.file.Write(buf[:])
|
||||
r.fileMu.Unlock() // Added
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Stop terminates the recorder
|
||||
func (r *Recorder) Stop() {
|
||||
r.path.RemoveReader(r.name)
|
||||
close(r.terminate)
|
||||
}
|
||||
|
||||
// NewVADReader creates a VAD reader that subscribes to the path
|
||||
func NewVADReader(path *Path, isMuLaw bool, vadMode, frameMs int) (*VADReader, error) {
|
||||
vad, err := webrtcvad.New()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create VAD: %w", err)
|
||||
}
|
||||
if err := vad.SetMode(vadMode); err != nil {
|
||||
return nil, fmt.Errorf("set VAD mode %d: %w", vadMode, err)
|
||||
}
|
||||
if frameMs != 10 && frameMs != 20 && frameMs != 30 {
|
||||
return nil, fmt.Errorf("invalid frame duration %dms; must be 10, 20, or 30", frameMs)
|
||||
}
|
||||
sampleRate := 8000
|
||||
frameSamples := sampleRate * frameMs / 1000
|
||||
frameBytes := frameSamples * 2 // 16-bit PCM
|
||||
if !vad.ValidRateAndFrameLength(sampleRate, frameBytes) {
|
||||
return nil, fmt.Errorf("invalid VAD parameters: sample_rate=%d, frame_bytes=%d", sampleRate, frameBytes)
|
||||
}
|
||||
|
||||
r := &VADReader{
|
||||
name: "vad_reader",
|
||||
path: path,
|
||||
isMuLaw: isMuLaw,
|
||||
dataChan: make(chan *Data, 100), // Buffered channel
|
||||
terminate: make(chan struct{}),
|
||||
vad: vad,
|
||||
sampleRate: sampleRate,
|
||||
frameMs: frameMs,
|
||||
segmentStart: time.Now(),
|
||||
}
|
||||
path.AddReader(r.name, r.dataChan)
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Start runs the VAD reader's async processing loop
|
||||
func (r *VADReader) Start() {
|
||||
go func() {
|
||||
var pcmBuffer []byte
|
||||
var isSilent = true
|
||||
var audioStart time.Time // Added to track audio duration
|
||||
const minSilenceDuration = 4 * time.Second // Changed from 2s
|
||||
const maxSilenceDuration = 6 * time.Second // Added
|
||||
|
||||
frameSamples := r.sampleRate * r.frameMs / 1000
|
||||
frameBytes := frameSamples * 2 // 16-bit PCM
|
||||
for {
|
||||
select {
|
||||
case data := <-r.dataChan:
|
||||
@ -218,7 +422,7 @@ func (r *Recorder) Start() {
|
||||
slog.Error("Unmarshal audio error", "error", err)
|
||||
continue
|
||||
}
|
||||
// Convert to 16-bit PCM and write
|
||||
// Convert to 16-bit PCM
|
||||
for _, sample := range samples {
|
||||
var pcmSample int16
|
||||
if r.isMuLaw {
|
||||
@ -228,39 +432,56 @@ func (r *Recorder) Start() {
|
||||
}
|
||||
var buf [2]byte
|
||||
binary.LittleEndian.PutUint16(buf[:], uint16(pcmSample))
|
||||
_, err := r.writer.Write(buf[:])
|
||||
pcmBuffer = append(pcmBuffer, buf[:]...)
|
||||
}
|
||||
|
||||
// Process VAD frames
|
||||
for len(pcmBuffer) >= frameBytes {
|
||||
frame := pcmBuffer[:frameBytes]
|
||||
pcmBuffer = pcmBuffer[frameBytes:]
|
||||
|
||||
active, err := r.vad.Process(r.sampleRate, frame)
|
||||
if err != nil {
|
||||
slog.Error("Write error", "error", err)
|
||||
slog.Warn("VAD processing error", "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
if active && isSilent {
|
||||
slog.Info("Speech detected", "timestamp", now.Format("2006-01-02 15:04:05"))
|
||||
r.path.eventChan <- Event{Type: "speech_detected", Data: now}
|
||||
isSilent = false
|
||||
r.segmentStart = now
|
||||
audioStart = now // Added
|
||||
} else if !active && !isSilent {
|
||||
if r.segmentStart.IsZero() {
|
||||
r.segmentStart = now
|
||||
} else if now.Sub(r.segmentStart) >= minSilenceDuration && now.Sub(r.segmentStart) <= maxSilenceDuration {
|
||||
var audioDurationMs int64
|
||||
if !audioStart.IsZero() {
|
||||
audioDurationMs = now.Sub(audioStart).Milliseconds() // Added
|
||||
}
|
||||
slog.Info("Silence detected",
|
||||
"timestamp", now.Format("2006-01-02 15:04:05"),
|
||||
"silence_duration_ms", now.Sub(r.segmentStart).Milliseconds(),
|
||||
"audio_duration_ms", audioDurationMs) // Added
|
||||
r.path.eventChan <- Event{Type: "silence_detected", Data: now}
|
||||
isSilent = true
|
||||
r.segmentStart = time.Time{} // Modified: reset to allow new silence detection
|
||||
audioStart = time.Time{} // Added
|
||||
}
|
||||
}
|
||||
totalDataSize += 2
|
||||
}
|
||||
}
|
||||
case <-r.terminate:
|
||||
// Update WAV header with actual data size
|
||||
if err := r.writer.Flush(); err != nil {
|
||||
slog.Error("Flush error", "error", err)
|
||||
}
|
||||
if _, err := r.file.Seek(4, 0); err != nil {
|
||||
slog.Error("Seek error", "error", err)
|
||||
return
|
||||
}
|
||||
var buf [4]byte
|
||||
binary.LittleEndian.PutUint32(buf[:], 36+totalDataSize)
|
||||
r.file.Write(buf[:])
|
||||
if _, err := r.file.Seek(40, 0); err != nil {
|
||||
slog.Error("Seek error", "error", err)
|
||||
return
|
||||
}
|
||||
binary.LittleEndian.PutUint32(buf[:], totalDataSize)
|
||||
r.file.Write(buf[:])
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Stop terminates the recorder
|
||||
func (r *Recorder) Stop() {
|
||||
// Stop terminates the VAD reader
|
||||
func (r *VADReader) Stop() {
|
||||
r.path.RemoveReader(r.name)
|
||||
close(r.terminate)
|
||||
}
|
||||
@ -285,6 +506,35 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Parse segment duration (default to 5 minutes)
|
||||
segmentDur := 5 * time.Minute
|
||||
if conf.RecordSegmentDuration != "" {
|
||||
dur, err := time.ParseDuration(conf.RecordSegmentDuration)
|
||||
if err != nil {
|
||||
slog.Error("Invalid record_segment_duration", "value", conf.RecordSegmentDuration, "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
segmentDur = dur
|
||||
}
|
||||
|
||||
// Parse VAD settings (default: mode=3, frame_ms=20)
|
||||
vadMode := 3
|
||||
frameMs := 20
|
||||
if conf.VADMode != 0 {
|
||||
if conf.VADMode < 0 || conf.VADMode > 3 {
|
||||
slog.Error("Invalid vad_mode", "value", conf.VADMode, "allowed", "0-3")
|
||||
os.Exit(1)
|
||||
}
|
||||
vadMode = conf.VADMode
|
||||
}
|
||||
if conf.FrameMs != 0 {
|
||||
if conf.FrameMs != 10 && conf.FrameMs != 20 && conf.FrameMs != 30 {
|
||||
slog.Error("Invalid frame_ms", "value", conf.FrameMs, "allowed", "10, 20, 30")
|
||||
os.Exit(1)
|
||||
}
|
||||
frameMs = conf.FrameMs
|
||||
}
|
||||
|
||||
// Step 2: Set up event bus
|
||||
eventChan := make(chan Event, 10)
|
||||
go func() {
|
||||
@ -343,17 +593,25 @@ func main() {
|
||||
|
||||
// Step 5: Set up recorder (choose μ-law or A-law based on format)
|
||||
isMuLaw := audioFormat.MULaw
|
||||
recorder, err := NewRecorder(path, conf.OutputFile, isMuLaw)
|
||||
recorder, err := NewRecorder(path, conf.OutputFile, isMuLaw, segmentDur) // Modified: added segmentDur
|
||||
if err != nil {
|
||||
slog.Error("Create recorder", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
recorder.Start()
|
||||
|
||||
// Step 6: Signal source ready
|
||||
// Step 6: Set up VAD reader
|
||||
vadReader, err := NewVADReader(path, isMuLaw, vadMode, frameMs)
|
||||
if err != nil {
|
||||
slog.Error("Create VAD reader", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
vadReader.Start()
|
||||
|
||||
// Step 7: Signal source ready
|
||||
path.SourceReady(audioMedia, audioFormat)
|
||||
|
||||
// Step 7: Read RTP packets and distribute
|
||||
// Step 8: Read RTP packets and distribute
|
||||
go func() {
|
||||
c.OnPacketRTP(audioMedia, audioFormat, func(pkt *rtp.Packet) {
|
||||
path.DistributeData(&Data{Media: audioMedia, Pkt: pkt})
|
||||
|
Reference in New Issue
Block a user