-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.bak
223 lines (183 loc) · 8.54 KB
/
main.bak
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import sys
import os
import subprocess
import logging
import json
import numpy as np
from PyQt5.QtWidgets import QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QPushButton, QTextEdit, QProgressBar
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QTimer
import speech_recognition as sr
import google.generativeai as genai
import sounddevice as sd
from scipy.io import wavfile
from scipy import signal
# Configure logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
# Configure the Gemini API (you'll need to set up your API key)
genai.configure(api_key="AIzaSyBvVbc7VHY1TkPm2FWm20EQ51hbhNHcW84")
PROMPT_TEMPLATE = """
You are given a task where you need to provide a Bash command that can be directly executed in a Bash script. The command should resolve the issue described below, and your response should follow these rules:
1. Provide the commands in the form of bash script to solve the problem.
2. Include a small description explaining what each command does.
3. Return the result in JSON format so that it can be parsed easily.
4. Each JSON object should have the following structure:
- "bash script": The actual Bash script to be executed.
- "description": Just small explanation of what the script does. Don't mention the word script itself in the description.
Here is the issue you need to resolve: {query}
Please return only the JSON object in your response.
Don't include any additional text like ``` or comments in your response.
Return in stringified JSON format only so that it can be converted using python json.loads().
"""
class VoiceAssistant(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Smart Voice Assistant")
self.setGeometry(100, 100, 800, 600)
central_widget = QWidget()
self.setCentralWidget(central_widget)
layout = QVBoxLayout(central_widget)
# Status label
self.status_label = QLabel("Idle")
layout.addWidget(self.status_label)
# Button to start listening
self.listen_button = QPushButton("Start Listening")
self.listen_button.clicked.connect(self.start_listening)
layout.addWidget(self.listen_button)
# Progress bar
self.progress_bar = QProgressBar()
self.progress_bar.setRange(0, 100)
self.progress_bar.setValue(0)
layout.addWidget(self.progress_bar)
# Integrated terminal
self.terminal = QTextEdit()
self.terminal.setReadOnly(True)
layout.addWidget(self.terminal)
# Voice recognition thread
self.voice_thread = VoiceRecognitionThread()
self.voice_thread.status_update.connect(self.update_status)
self.voice_thread.command_received.connect(self.process_command)
# Timer for progress bar
self.timer = QTimer(self)
self.timer.timeout.connect(self.update_progress)
logging.info("Voice Assistant initialized")
def start_listening(self):
logging.info("Starting listening")
self.progress_bar.setValue(0)
self.timer.start(40) # Update every 40ms for smooth progress (4000ms / 100)
self.voice_thread.start()
def update_progress(self):
value = self.progress_bar.value() + 1
if value > 100:
self.timer.stop()
self.progress_bar.setValue(0)
else:
self.progress_bar.setValue(value)
def update_status(self, status):
logging.debug(f"Status updated: {status}")
self.status_label.setText(status)
self.terminal.append(f"Status: {status}\n")
def process_command(self, command):
logging.info(f"Processing command: {command}")
self.update_status("Processing")
# Use Gemini API to convert command to bash script
model = genai.GenerativeModel('gemini-1.5-flash')
prompt = PROMPT_TEMPLATE.format(query=command)
response = model.generate_content(prompt)
print("############################################")
print(response.text)
print("############################################")
try:
jsondata = json.loads(response.text)
bash_script = jsondata.get('bash script')
description = jsondata.get('description')
logging.debug(f"Generated bash script: {bash_script}")
logging.debug(f"Description: {description}")
# Write bash script to file
with open('script.sh', 'w') as f:
f.write(bash_script)
# Make script executable and run it
os.system('chmod +x script.sh')
result = subprocess.run(['bash', 'script.sh'], capture_output=True, text=True)
output = result.stdout if result.returncode == 0 else result.stderr
self.terminal.append(f"Command: {command}\nDescription: {description}\nOutput: {output}\n")
logging.info(f"Command executed. Output: {output}")
except json.JSONDecodeError:
error_message = "Error: Invalid JSON response from Gemini API"
self.terminal.append(error_message + "\n")
logging.error(error_message)
except Exception as e:
error_message = f"Error executing command: {str(e)}"
self.terminal.append(error_message + "\n")
logging.error(error_message)
self.update_status("Idle")
class VoiceRecognitionThread(QThread):
status_update = pyqtSignal(str)
command_received = pyqtSignal(str)
def run(self):
self.status_update.emit("Listening")
logging.info("Listening for audio input")
try:
# Record audio
duration = 4 # seconds
fs = 44100 # Sample rate
recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
sd.wait()
recording = recording.flatten()
# Apply audio processing
processed_audio = self.process_audio(recording, fs)
# Save processed audio to WAV file
wavfile.write("processed_audio.wav", fs, processed_audio.astype(np.int16))
# Use multiple recognition engines
recognizers = [
(sr.Recognizer().recognize_google, "Google Speech Recognition"),
(sr.Recognizer().recognize_sphinx, "CMU Sphinx"),
]
for recognizer_func, name in recognizers:
try:
with sr.AudioFile("processed_audio.wav") as source:
audio = sr.Recognizer().record(source)
command = recognizer_func(audio)
logging.info(f"Recognized command using {name}: {command}")
self.command_received.emit(command)
return
except sr.UnknownValueError:
logging.warning(f"{name} could not understand audio")
except sr.RequestError as e:
logging.error(f"Could not request results from {name}; {e}")
except Exception as e:
logging.error(f"Error with {name}: {e}")
self.status_update.emit("Could not understand audio")
logging.error("All recognition engines failed")
except Exception as e:
error_message = f"Error in voice recognition: {str(e)}"
self.status_update.emit(error_message)
logging.error(error_message)
def process_audio(self, audio, fs):
# Noise reduction
noise_reduced = self.reduce_noise(audio)
# Normalization
normalized = self.normalize(noise_reduced)
# Apply bandpass filter
lowcut = 300
highcut = 3000
filtered = self.butter_bandpass_filter(normalized, lowcut, highcut, fs, order=6)
return filtered
def reduce_noise(self, audio):
# Simple noise reduction using spectral gating
noise_sample = audio[:int(len(audio) * 0.1)] # Use first 10% as noise sample
noise_profile = np.mean(np.abs(noise_sample))
return np.where(np.abs(audio) < noise_profile * 2, 0, audio)
def normalize(self, audio):
return audio / np.max(np.abs(audio))
def butter_bandpass_filter(self, data, lowcut, highcut, fs, order=5):
nyq = 0.5 * fs
low = lowcut / nyq
high = highcut / nyq
b, a = signal.butter(order, [low, high], btype='band')
y = signal.lfilter(b, a, data)
return y
if __name__ == "__main__":
app = QApplication(sys.argv)
assistant = VoiceAssistant()
assistant.show()
sys.exit(app.exec_())