Skip to content

Commit 974b7de

Browse files
author
Simon Berger
committed
Update users/berger
1 parent 84686ed commit 974b7de

21 files changed

+585
-204
lines changed

users/berger/args/jobs/rasr_init_args.py

+25
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def get_feature_extraction_args_16kHz(
9191
gt_args: Optional[Dict] = None,
9292
) -> Dict:
9393
mfcc_filter_width = features.filter_width_from_channels(channels=20, f_max=8000) # = 16000 / 2
94+
filterbank_filter_width = features.filter_width_from_channels(channels=80, f_max=8000) # = 16000 / 2
9495

9596
if mfcc_cepstrum_options is None:
9697
mfcc_cepstrum_options = {
@@ -142,6 +143,30 @@ def get_feature_extraction_args_16kHz(
142143
"normalization_options": {},
143144
}
144145
},
146+
"filterbank": {
147+
"filterbank_options": {
148+
"warping_function": "mel",
149+
"filter_width": filterbank_filter_width,
150+
"normalize": False,
151+
"normalization_options": {},
152+
"without_samples": False,
153+
"samples_options": {
154+
"audio_format": "wav",
155+
# "scale_input": 2**-15,
156+
"dc_detection": dc_detection,
157+
},
158+
"fft_options": {
159+
"preemphasis": 0.97,
160+
"window_type": "hanning",
161+
"window_shift": 0.01,
162+
"window_length": 0.025,
163+
},
164+
"apply_log": True,
165+
"add_epsilon": True,
166+
"add_features_output": True,
167+
# "warp_differential_unit": False,
168+
},
169+
},
145170
"energy": {
146171
"energy_options": {
147172
"without_samples": False,

users/berger/args/returnn/config.py

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def get_base_config(backend: Backend) -> Dict[str, Any]:
2121
elif backend == Backend.PYTORCH:
2222
result["backend"] = "torch"
2323
result["use_lovely_tensors"] = True
24+
# result["torch_amp"] = {"dtype": "bfloat16"}
2425
else:
2526
raise NotImplementedError
2627
return result

users/berger/configs/tedlium2/20230602_rescale_baselines/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .config_01_conformer_ctc import py as py_01
88

99
from .config_04a_conformer_transducer_bpe import py as py_04a
10+
from .config_04a_conformer_transducer_bpe_rasr import py as py_04a_rasr
1011
from .config_04b_conformer_transducer_phon import py as py_04b
1112

1213

@@ -88,6 +89,7 @@ def worker_wrapper(job, task_name, call):
8889
copy.deepcopy(py_01()),
8990
copy.deepcopy(py_01b()),
9091
copy.deepcopy(py_04a()),
92+
copy.deepcopy(py_04a_rasr()),
9193
copy.deepcopy(py_04b()),
9294
]:
9395
subreport.collapse([SummaryKey.CORPUS.value], best_selector_key=SummaryKey.ERR.value)

users/berger/configs/tedlium2/20230602_rescale_baselines/config_01_conformer_ctc.py

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def run_exp() -> SummaryReport:
114114
prior_scales=[0.5],
115115
lm_scales=[1.1],
116116
feature_type=FeatureType.GAMMATONE_16K,
117+
search_stats=True,
117118
)
118119

119120
# ********** System **********

users/berger/configs/tedlium2/20230602_rescale_baselines/config_01b_conformer_ctc_logmel.py

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def returnn_config_generator(
4242
extra_config = {
4343
"train": train_data_config,
4444
"dev": dev_data_config,
45+
"torch_amp": {"dtype": "bfloat16"},
4546
}
4647

4748
if variant == ConfigVariant.TRAIN:

users/berger/configs/tedlium2/20230602_rescale_baselines/config_04a_conformer_transducer_bpe.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def returnn_config_generator(
5959
"train": train_data_config,
6060
"dev": dev_data_config,
6161
"max_seq_length": {"audio_features": 560000},
62+
"torch_amp": {"dtype": "bfloat16"},
6263
}
6364
serializer = model.get_train_serializer(model_config, **kwargs)
6465

@@ -159,7 +160,7 @@ def run_exp() -> SummaryReport:
159160
data.train_data_config,
160161
data.cv_data_config,
161162
data.forward_data_config,
162-
beam_sizes=[1, 2, 4],
163+
beam_sizes=[1, 2, 3],
163164
),
164165
)
165166

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
import copy
2+
import os
3+
from typing import List, Optional
4+
from i6_core.returnn.config import ReturnnConfig
5+
6+
from sisyphus import gs, tk
7+
8+
import i6_core.rasr as rasr
9+
from i6_experiments.users.berger.args.experiments import transducer as exp_args
10+
from i6_experiments.users.berger.args.returnn.config import get_returnn_config, Backend
11+
from i6_experiments.users.berger.args.returnn.learning_rates import LearningRateSchedules, Optimizers
12+
from i6_experiments.users.berger.corpus.tedlium2.bpe_transducer_data import get_tedlium2_data_dumped_bpe_labels
13+
from i6_experiments.users.berger.pytorch.models import conformer_transducer_v2 as model
14+
from i6_experiments.users.berger.recipe.summary.report import SummaryReport
15+
from i6_experiments.users.berger.systems.dataclasses import ConfigVariant, EncDecConfig, FeatureType, ReturnnConfigs
16+
from i6_experiments.users.berger.systems.returnn_seq2seq_system import ReturnnSeq2SeqSystem
17+
from i6_experiments.users.berger.util import default_tools_v2
18+
from i6_experiments.users.berger.systems.functors.recognition.returnn_search import LexiconType
19+
from i6_experiments.users.berger.systems.functors.rasr_base import RecognitionScoringType
20+
21+
# ********** Settings **********
22+
23+
rasr.flow.FlowNetwork.default_flags = {"cache_mode": "task_dependent"}
24+
25+
num_outputs = 1068
26+
num_subepochs = 500
27+
28+
tools = copy.deepcopy(default_tools_v2)
29+
tools.rasr_binary_path = tk.Path("/u/berger/repositories/rasr_versions/gen_seq2seq_dev/arch/linux-x86_64-standard")
30+
31+
32+
# ********** Return Config generators **********
33+
34+
35+
def returnn_config_generator(
36+
train_data_config: dict,
37+
dev_data_config: dict,
38+
**kwargs,
39+
) -> ReturnnConfig:
40+
model_config = model.get_default_config_v1(num_outputs=num_outputs)
41+
42+
extra_config = {
43+
"train": train_data_config,
44+
"dev": dev_data_config,
45+
"max_seq_length": {"audio_features": 560000},
46+
"torch_amp": {"dtype": "bfloat16"},
47+
}
48+
serializer = model.get_train_serializer(model_config, **kwargs)
49+
50+
return get_returnn_config(
51+
num_epochs=num_subepochs,
52+
num_inputs=1,
53+
num_outputs=num_outputs,
54+
target="classes",
55+
extra_python=[serializer],
56+
extern_data_config=True,
57+
backend=Backend.PYTORCH,
58+
grad_noise=0.0,
59+
grad_clip=0.0,
60+
optimizer=Optimizers.AdamW,
61+
schedule=LearningRateSchedules.OCLR,
62+
initial_lr=1e-06,
63+
peak_lr=8e-05,
64+
decayed_lr=1e-05,
65+
final_lr=1e-08,
66+
batch_size=10000 * 160,
67+
use_chunking=False,
68+
extra_config=extra_config,
69+
)
70+
71+
72+
def recog_returnn_configs_generator(
73+
**kwargs,
74+
) -> EncDecConfig[ReturnnConfig]:
75+
model_config = model.get_default_config_v1(num_outputs=num_outputs)
76+
77+
enc_extra_config = {
78+
"extern_data": {
79+
"sources": {"dim": 80, "dtype": "float32"},
80+
},
81+
"model_outputs": {
82+
"source_encodings": {
83+
"dim": 384,
84+
"dtype": "float32",
85+
},
86+
},
87+
}
88+
dec_extra_config = {
89+
"extern_data": {
90+
"source_encodings": {
91+
"dim": 384,
92+
"time_dim_axis": None,
93+
"dtype": "float32",
94+
},
95+
"targets": {
96+
"dim": num_outputs,
97+
"time_dim_axis": None,
98+
"sparse": True,
99+
"shape": (1,),
100+
"dtype": "int32",
101+
},
102+
},
103+
"model_outputs": {
104+
"log_probs": {
105+
"dim": num_outputs,
106+
"time_dim_axis": None,
107+
"dtype": "float32",
108+
}
109+
},
110+
}
111+
enc_serializer = model.get_encoder_recog_serializer(model_config, **kwargs)
112+
dec_serializer = model.get_decoder_recog_serializer(model_config, **kwargs)
113+
114+
return EncDecConfig(
115+
encoder_config=get_returnn_config(
116+
num_inputs=80,
117+
num_outputs=num_outputs,
118+
target=None,
119+
extra_python=[enc_serializer],
120+
extern_data_config=False,
121+
backend=Backend.PYTORCH,
122+
extra_config=enc_extra_config,
123+
),
124+
decoder_config=get_returnn_config(
125+
num_inputs=1,
126+
num_outputs=num_outputs,
127+
target=None,
128+
# python_prolog=["from returnn.tensor.dim import Dim, batch_dim"],
129+
extra_python=[dec_serializer],
130+
extern_data_config=False,
131+
backend=Backend.PYTORCH,
132+
extra_config=dec_extra_config,
133+
),
134+
)
135+
136+
137+
def get_returnn_config_collection(
138+
train_data_config: dict,
139+
dev_data_config: dict,
140+
**kwargs,
141+
) -> ReturnnConfigs[ReturnnConfig]:
142+
return ReturnnConfigs(
143+
train_config=returnn_config_generator(
144+
train_data_config=train_data_config,
145+
dev_data_config=dev_data_config,
146+
blank_id=0,
147+
**kwargs,
148+
),
149+
recog_configs={
150+
"recog": recog_returnn_configs_generator(
151+
train_data_config=train_data_config,
152+
dev_data_config=dev_data_config,
153+
**kwargs,
154+
)
155+
},
156+
)
157+
158+
159+
def run_exp() -> SummaryReport:
160+
assert tools.returnn_root
161+
assert tools.returnn_python_exe
162+
assert tools.rasr_binary_path
163+
data = get_tedlium2_data_dumped_bpe_labels(
164+
num_classes=num_outputs,
165+
returnn_root=tools.returnn_root,
166+
returnn_python_exe=tools.returnn_python_exe,
167+
rasr_binary_path=tools.rasr_binary_path,
168+
augmented_lexicon=True,
169+
feature_type=FeatureType.SAMPLES,
170+
)
171+
172+
# ********** Step args **********
173+
174+
train_args = exp_args.get_transducer_train_step_args(num_epochs=num_subepochs, gpu_mem_rqmt=24)
175+
recog_args = exp_args.get_transducer_recog_step_args(
176+
num_classes=num_outputs,
177+
epochs=[500],
178+
lm_scales=[0.5],
179+
label_scorer_type="onnx-ffnn-transducer",
180+
label_scorer_args={"extra_args": {"start_label_index": 0}},
181+
reduction_subtrahend=3,
182+
reduction_factor=4,
183+
feature_type=FeatureType.LOGMEL_16K,
184+
)
185+
186+
# ********** System **********
187+
188+
system = ReturnnSeq2SeqSystem(tools)
189+
190+
system.init_corpora(
191+
dev_keys=data.dev_keys,
192+
test_keys=data.test_keys,
193+
corpus_data=data.data_inputs,
194+
am_args=exp_args.transducer_recog_am_args,
195+
)
196+
system.setup_scoring()
197+
198+
# ********** Returnn Configs **********
199+
200+
system.add_experiment_configs(
201+
"Conformer_Transducer",
202+
get_returnn_config_collection(
203+
data.train_data_config,
204+
data.cv_data_config,
205+
),
206+
)
207+
208+
system.run_train_step(**train_args)
209+
system.run_dev_recog_step(**recog_args)
210+
211+
assert system.summary_report
212+
return system.summary_report
213+
214+
215+
def py() -> SummaryReport:
216+
filename_handle = os.path.splitext(os.path.basename(__file__))[0][len("config_") :]
217+
gs.ALIAS_AND_OUTPUT_SUBDIR = f"{filename_handle}/"
218+
219+
summary_report = SummaryReport()
220+
221+
summary_report.merge_report(run_exp(), update_structure=True)
222+
223+
tk.register_report(f"{gs.ALIAS_AND_OUTPUT_SUBDIR}/summary.report", summary_report)
224+
225+
return summary_report

0 commit comments

Comments
 (0)