Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix skipped audio chunk #318

Merged
merged 1 commit into from
Jan 13, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions whisper_live/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,7 @@ def __init__(self, websocket, task="transcribe", device=None, language=None, cli
self.vad_parameters = vad_parameters or {"onset": 0.5}
self.no_speech_thresh = 0.45
self.same_output_threshold = 10
self.end_time_for_same_output = None

device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
Expand Down Expand Up @@ -1095,10 +1096,16 @@ def update_segments(self, segments, duration):

if self.current_out.strip() == self.prev_out.strip() and self.current_out != '':
self.same_output_count += 1

# if we remove the audio because of same output on the nth reptition we might remove the
# audio thats not yet transcribed so, capturing the time when it was repeated for the first time
if self.end_time_for_same_output is None:
self.end_time_for_same_output = segments[-1].end
time.sleep(0.1) # wait for some voice activity just in case there is an unitended pause from the speaker for better punctuations.
else:
self.same_output_count = 0

self.end_time_for_same_output = None

# if same incomplete segment is seen multiple times then update the offset
# and append the segment to the list
if self.same_output_count > self.same_output_threshold:
Expand All @@ -1107,14 +1114,15 @@ def update_segments(self, segments, duration):
with self.lock:
self.transcript.append(self.format_segment(
self.timestamp_offset,
self.timestamp_offset + duration,
self.timestamp_offset + min(duration, self.end_time_for_same_output),
self.current_out,
completed=True
))
self.current_out = ''
offset = duration
offset = min(duration, self.end_time_for_same_output)
self.same_output_count = 0
last_segment = None
self.end_time_for_same_output = None
else:
self.prev_out = self.current_out

Expand Down
Loading