Skip to content
This repository has been archived by the owner on May 12, 2024. It is now read-only.

Commit

Permalink
updating configuration for Whisper
Browse files Browse the repository at this point in the history
  • Loading branch information
Jemoka committed Dec 15, 2023
1 parent 8b09e3b commit 9d87dbc
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 8 deletions.
10 changes: 5 additions & 5 deletions baln/asrengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,17 @@ def __init__(self, model, base="openai/whisper-large-v2", language="english", ta
model=model,
tokenizer=WhisperTokenizer.from_pretrained(base),
chunk_length_s=30,
stride_length_s=5,
stride_length_s=3,
device=DEVICE,
return_timestamps="word",
)
processor = WhisperProcessor.from_pretrained(base)
self.__config = GenerationConfig.from_pretrained(base)
self.__config.no_repeat_ngram_size = 5
self.__config.no_repeat_ngram_size = 2

# force decoder IDs to create language
self.lang = language
self.__prompt_ids = processor.get_prompt_ids("um hello.")
self.__prompt_ids = processor.get_prompt_ids("um.")

# save the target sample rate
self.sample_rate = target_sample_rate
Expand Down Expand Up @@ -173,7 +173,7 @@ def __call__(self, data, segments):
words = self.pipe(data.cpu().numpy(),
batch_size=10,
generate_kwargs = {
"repetition_penalty": 1.01,
"repetition_penalty": 1.02,
"generation_config": self.__config,
"prompt_ids": self.__prompt_ids,
"task": "transcribe",
Expand All @@ -186,7 +186,7 @@ def __call__(self, data, segments):
#"temperature": 0.75,
# })
# to filter out the two word prompt
words = words["chunks"][2:]
words = words["chunks"][1:]

# filter out the elements in the prompt, which has timestamp (0,0)
# words = list(filter(lambda x:x["timestamp"] != (0.0, 0.0), words))
Expand Down
4 changes: 2 additions & 2 deletions baln/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from multiprocessing import Process, freeze_support

VERSION="0.3.61"
NOTES="dutch whisper"
VERSION="0.3.62"
NOTES="dutch whisper, bug fix"

#################### OPTIONS ################################

Expand Down
2 changes: 1 addition & 1 deletion meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "batchalign" %}
{% set version = "0.3.61" %}
{% set version = "0.3.62" %}

package:
name: {{ name }}
Expand Down

0 comments on commit 9d87dbc

Please sign in to comment.