-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathsound_event_detection.py
68 lines (53 loc) · 2.02 KB
/
sound_event_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import pyaudio
from matplotlib import pyplot as plt
import pandas as pd
import sounddevice as sd
from keras_yamnet import params
from keras_yamnet.yamnet import YAMNet, class_names
from keras_yamnet.preprocessing import preprocess_input
from plot import Plotter
if __name__ == "__main__":
################### SETTINGS ###################
plt_classes = [0,132,420,494] # Speech, Music, Explosion, Silence
class_labels=True
FORMAT = pyaudio.paFloat32
CHANNELS = 1
RATE = params.SAMPLE_RATE
WIN_SIZE_SEC = 0.975
CHUNK = int(WIN_SIZE_SEC * RATE)
RECORD_SECONDS = 500
print(sd.query_devices())
MIC = None
#################### MODEL #####################
model = YAMNet(weights='keras_yamnet/yamnet.h5')
yamnet_classes = class_names('keras_yamnet/yamnet_class_map.csv')
#################### STREAM ####################
audio = pyaudio.PyAudio()
# start Recording
stream = audio.open(format=FORMAT,
input_device_index=MIC,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("recording...")
if plt_classes is not None:
plt_classes_lab = yamnet_classes[plt_classes]
n_classes = len(plt_classes)
else:
plt_classes = [k for k in range(len(yamnet_classes))]
plt_classes_lab = yamnet_classes if class_labels else None
n_classes = len(yamnet_classes)
monitor = Plotter(n_classes=n_classes, FIG_SIZE=(12,6), msd_labels=plt_classes_lab)
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
# Waveform
data = preprocess_input(np.fromstring(
stream.read(CHUNK), dtype=np.float32), RATE)
prediction = model.predict(np.expand_dims(data,0))[0]
monitor(data.transpose(), np.expand_dims(prediction[plt_classes],-1))
print("finished recording")
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()