-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaudio_processing.py
42 lines (32 loc) · 1.48 KB
/
audio_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import speech_recognition as sr # For audio processing and recording audio
import wave # To create a .wav file out of the microphone recordings
import whisper # Advanced OpenAI audio transcribing model
import librosa # Converting the .wav file into a floating-point time series
import numpy as np
import re
def nlp():
recognizer = sr.Recognizer()
try:
with sr.Microphone() as source:
audio_data = recognizer.listen(source, timeout=10)
with wave.open("captured_audio1.wav", "wb") as wave_file:
wave_file.setnchannels(1)
wave_file.setsampwidth(audio_data.sample_width)
wave_file.setframerate(audio_data.sample_rate)
wave_file.writeframes(audio_data.frame_data)
y, source = librosa.load("captured_audio1.wav")
audio_waveform = np.array(y)
audio_model = whisper.load_model("base")
result = audio_model.transcribe(audio_waveform, fp16=False)["text"][1:]
sen_type = None
if result[-1] == "?":
sen_type = "Interrogative"
punctuation_pattern = re.compile(r'[^\w\s]')
text = re.sub(punctuation_pattern, '', result)
return sen_type, text
except sr.UnknownValueError:
print("Speech Recognition could not understand audio.")
return None, None
except sr.RequestError as e:
print(f"Could not request results from OpenAI Whisper API service; {e}")
return None, None