QuanHoangNgoc commited on
Commit
cffeb75
·
verified ·
1 Parent(s): e30f601

Upload new exp dir of maintab_fast_conformer

Browse files
Files changed (36) hide show
  1. .gitattributes +4 -0
  2. maintab_fast_conformer_10-09_01-54/25808631.csv +0 -0
  3. maintab_fast_conformer_10-09_01-54/40250165.csv +0 -0
  4. maintab_fast_conformer_10-09_01-54/60227680.csv +0 -0
  5. maintab_fast_conformer_10-09_01-54/_dev_mf.json +0 -0
  6. maintab_fast_conformer_10-09_01-54/_nemo_model_part_0.nemo +3 -0
  7. maintab_fast_conformer_10-09_01-54/_nemo_model_part_1.nemo +3 -0
  8. maintab_fast_conformer_10-09_01-54/_train_mf.json +3 -0
  9. maintab_fast_conformer_10-09_01-54/args_trainer.txt +1 -0
  10. maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=0-valwerval_wer=0.9991.ckpt +3 -0
  11. maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=1-valwerval_wer=0.9954.ckpt +3 -0
  12. maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=108-valwerval_wer=0.4084.ckpt +3 -0
  13. maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=109-valwerval_wer=0.4073.ckpt +3 -0
  14. maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=110-valwerval_wer=0.4045.ckpt +3 -0
  15. maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=111-valwerval_wer=0.4008.ckpt +3 -0
  16. maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=112-valwerval_wer=0.3828.ckpt +3 -0
  17. maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=2-valwerval_wer=0.9649.ckpt +3 -0
  18. maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=28-valwerval_wer=1.1402.ckpt +3 -0
  19. maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=31-valwerval_wer=1.1436.ckpt +3 -0
  20. maintab_fast_conformer_10-09_01-54/code-folder/Demo/demo.yaml +269 -0
  21. maintab_fast_conformer_10-09_01-54/code-folder/Demo/train.py +270 -0
  22. maintab_fast_conformer_10-09_01-54/code-folder/Demo/utils.py +269 -0
  23. maintab_fast_conformer_10-09_01-54/code-folder/__pycache__/train.cpython-311.pyc +0 -0
  24. maintab_fast_conformer_10-09_01-54/code-folder/configs/fast_conformer.yaml +271 -0
  25. maintab_fast_conformer_10-09_01-54/code-folder/train.py +315 -0
  26. maintab_fast_conformer_10-09_01-54/code-folder/utils/__pycache__/utils.cpython-311.pyc +0 -0
  27. maintab_fast_conformer_10-09_01-54/code-folder/utils/install_cmd.txt +22 -0
  28. maintab_fast_conformer_10-09_01-54/code-folder/utils/utils.py +283 -0
  29. maintab_fast_conformer_10-09_01-54/conf_model.txt +1 -0
  30. maintab_fast_conformer_10-09_01-54/git_pip_env.txt +895 -0
  31. maintab_fast_conformer_10-09_01-54/model_avg.ckpt +3 -0
  32. maintab_fast_conformer_10-09_01-54/model_mp.txt +15 -0
  33. maintab_fast_conformer_10-09_01-54/nemo_model_avg.nemo +3 -0
  34. maintab_fast_conformer_10-09_01-54/training_process_100.png +0 -0
  35. maintab_fast_conformer_10-09_01-54/training_process_112.png +0 -0
  36. maintab_fast_conformer_10-09_01-54/training_process_42.png +0 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ maintab_fast_conformer_10-09_01-54/_nemo_model_part_0.nemo filter=lfs diff=lfs merge=lfs -text
37
+ maintab_fast_conformer_10-09_01-54/_nemo_model_part_1.nemo filter=lfs diff=lfs merge=lfs -text
38
+ maintab_fast_conformer_10-09_01-54/_train_mf.json filter=lfs diff=lfs merge=lfs -text
39
+ maintab_fast_conformer_10-09_01-54/nemo_model_avg.nemo filter=lfs diff=lfs merge=lfs -text
maintab_fast_conformer_10-09_01-54/25808631.csv ADDED
The diff for this file is too large to render. See raw diff
 
maintab_fast_conformer_10-09_01-54/40250165.csv ADDED
The diff for this file is too large to render. See raw diff
 
maintab_fast_conformer_10-09_01-54/60227680.csv ADDED
The diff for this file is too large to render. See raw diff
 
maintab_fast_conformer_10-09_01-54/_dev_mf.json ADDED
The diff for this file is too large to render. See raw diff
 
maintab_fast_conformer_10-09_01-54/_nemo_model_part_0.nemo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b647ea527cd25659039983a67633bf136e64405dd93c52fbbfd30511e79d7b6
3
+ size 111626240
maintab_fast_conformer_10-09_01-54/_nemo_model_part_1.nemo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cef41435f6994b0ff1ac51f506d585959c92f5340d2708da2217e37a5c624be
3
+ size 111626240
maintab_fast_conformer_10-09_01-54/_train_mf.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:198327931cefe7cada1e1ae27d9b594456407377d82c75f9f65e8d7f4d3d90b4
3
+ size 15057811
maintab_fast_conformer_10-09_01-54/args_trainer.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {'precision': 'bf16', 'devices': 1, 'num_nodes': 1, 'accelerator': 'gpu', 'strategy': 'auto', 'max_epochs': 1000, 'accumulate_grad_batches': 1, 'gradient_clip_val': 0.0, 'log_every_n_steps': 100, 'val_check_interval': 1.0, 'enable_progress_bar': False, 'num_sanity_val_steps': 0, 'check_val_every_n_epoch': 1, 'sync_batchnorm': True, 'benchmark': False, 'enable_checkpointing': True, 'max_time': '00:09:05:00', 'callbacks': [<V2_Run.Fast_conformer_nemo.utils.utils.LossLogger object at 0x7e2c6bc4fb90>, <lightning.pytorch.callbacks.early_stopping.EarlyStopping object at 0x7e2c6b6b3110>, <lightning.pytorch.callbacks.model_checkpoint.ModelCheckpoint object at 0x7e2c6b87f350>]}
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=0-valwerval_wer=0.9991.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d347b08973443ac69db26151ac2ad15b7e65019240275abe3b5da1fc96406fde
3
+ size 334971421
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=1-valwerval_wer=0.9954.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:714aaba5e6afc63a396eccdf6df55f8c52ef736f312208574dd63dea9ea06a64
3
+ size 334971804
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=108-valwerval_wer=0.4084.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3253efb67dcac3d737b62ec40b370cc865452ce76536f27cea1139b551491e2b
3
+ size 334972634
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=109-valwerval_wer=0.4073.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2246d82d7db8b7d2541a5567fbf5c15538b3a1f16cf2c172efbaff438aacd812
3
+ size 334972634
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=110-valwerval_wer=0.4045.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0092203a63c727db37f170369c048aa4a3e1d5e81dd812b82d9f9573828c93f4
3
+ size 334972634
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=111-valwerval_wer=0.4008.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a79902648a37db478d1273b56974b77862457cfc026963060a57de3c8b30309c
3
+ size 334972634
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=112-valwerval_wer=0.3828.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9da164a893141bd05dac3280e072e62035e1335ef11e94f4eb69132fe3d88a53
3
+ size 334972634
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=2-valwerval_wer=0.9649.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e1bd3fe262d3d7ae16ff8090b45132c3be639f957a4e0f7b5ad0cced8f93b6
3
+ size 334972187
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=28-valwerval_wer=1.1402.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f389a64fdaebd40f41653e35012352d3f22f07da22fcc58b0b740d446c6b5a43
3
+ size 334972634
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=31-valwerval_wer=1.1436.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23d12861b5ce277e339631d5ad385d8e0988b8d241c66f272057d005c0aece2a
3
+ size 334972634
maintab_fast_conformer_10-09_01-54/code-folder/Demo/demo.yaml ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # It contains the default values for training a Fast Conformer-CTC ASR model, #large size (~120M) with CTC loss and sub-word encoding.
2
+
3
+ # You may find more info about FastConformer here: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#fast-conformer
4
+
5
+ # We suggest to use trainer.precision=bf16 for GPUs which support it otherwise trainer.precision=16 is recommended.
6
+ # Using bf16 or 16 would make it possible to double the batch size and speedup training/inference. If fp16 is not stable and model diverges after some epochs, you may use fp32.
7
+ # Here are the suggested batch size per GPU for each precision and memory sizes:
8
+ #! fp16 >> 32 --> batch_size = 16
9
+ # +-----------+------------+------------+
10
+ # | Precision | GPU Memory | Batch Size |
11
+ # +===========+============+============+
12
+ # | 32 | 16GB | 16 |
13
+ # | | 32GB | 32 |
14
+ # | | 80GB | 64 |
15
+ # +-----------+------------+------------+
16
+ # | fp16 or | 16GB | 32 |
17
+ # | bf16 | 32GB | 64 |
18
+ # | | 80GB | 128 |
19
+ # +-----------+------------+------------+
20
+ # Here are the recommended configs for different variants of FastConformer-CTC-BPE, other parameters are the same as in this config file.
21
+ #
22
+ # +--------------+---------+---------+----------+----------------+--------------+--------------------------+-----------------+------------+
23
+ # | Model | d_model | n_heads | n_layers |conv_kernel_size| weight_decay | pred_hidden/joint_hidden | pred_rnn_layers | xscaling |
24
+ # +==============+=========+========+===========+================+==============+==========================+=================+============+
25
+ # | Small (14M) | 176 | 4 | 16 | 9 | 0.0 | 320 | 1 | True |
26
+ # +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
27
+ # | Medium (32M) | 256 | 4 | 16 | 9 | 1e-3 | 640 | 1 | True |
28
+ # +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
29
+ # | Large (120M) | 512 | 8 | 17 | 9 | 1e-3 | 640 | 1 | True |
30
+ # +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
31
+ # | XLarge (616M)| 1024 | 8 | 24 | 9 | 1e-3 | 640 | 2 | False |
32
+ # +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
33
+ # | XXLarge(1.2B)| 1024 | 8 | 42 | 5 | 1e-3 | 640 | 2 | False |
34
+ # +--------------------------------------------------------------+--------------+--------------------------+-----------------+------------+
35
+
36
+ # Note: They are based on the assumption of max_duration of #20. If you have longer or shorter max_duration, then batch sizes may need to get updated accordingly.
37
+
38
+ # Default learning parameters in this config are set for global batch size of 2K while you may use lower values.
39
+ # To increase the global batch size with limited number of GPUs, you may use higher accumulate_grad_batches.
40
+ # However accumulate_grad_batches is better to be avoided as long as the global batch size is large enough and training is stable.
41
+
42
+ name: "FastConformer-CTC-BPE"
43
+ train_path: "output/annot/train_ds.json"
44
+ dev_path: "output/annot/dev_300.json"
45
+ test_path: "output/annot/test_ds.json"
46
+ vocab_dir: "output/annot/" #* correct
47
+ full_val_path: "output/annot/dev_ds.json"
48
+
49
+ train_ds_batch_size: 16
50
+ gen_ds_batch_size: 16
51
+ ds_max_duration: 32.0 #! Will be replaced
52
+ aug_time_masks: 2 # Change from 10 to faster smooth optimize
53
+
54
+ # Model parameters
55
+ d_model: 256
56
+ n_heads: 4
57
+ n_layers: 16
58
+ conv_kernel_size: 9
59
+ xscaling: true
60
+ # Optimize
61
+ lr: 1e-3 #! Select lr and decay = 0.0, sched, warm_step
62
+ weight_decay: 1e-3
63
+ warmup_steps: 15000
64
+
65
+ # Training parameters # Dont use # Dont use
66
+ num_epochs: 1000
67
+ precision: 32
68
+ accumulate_grad_batches: 1
69
+
70
+ model:
71
+ sample_rate: 16000
72
+ log_prediction: false #! (change from true to false) enables logging sample predictions in the output during training
73
+ ctc_reduction: "mean_volume"
74
+ skip_nan_grad: false
75
+
76
+ train_ds:
77
+ manifest_filepath: ${train_path}
78
+ sample_rate: ${model.sample_rate}
79
+ batch_size: ${train_ds_batch_size} # you may increase batch_size if your memory allows
80
+ shuffle: true
81
+ num_workers: 8
82
+ pin_memory: true
83
+ max_duration: ${ds_max_duration} # it is set for LibriSpeech, you may need to update it for your dataset
84
+ min_duration: 0.1
85
+ # tarred datasets
86
+ is_tarred: false
87
+ tarred_audio_filepaths: null
88
+ shuffle_n: 2048
89
+ # bucketing params
90
+ bucketing_strategy: "fully_randomized"
91
+ bucketing_batch_size: null
92
+
93
+ validation_ds:
94
+ manifest_filepath: ${dev_path}
95
+ sample_rate: ${model.sample_rate}
96
+ batch_size: ${gen_ds_batch_size} # you may increase batch_size if your memory allows
97
+ shuffle: false
98
+ use_start_end_token: false
99
+ num_workers: 8
100
+ pin_memory: true
101
+
102
+ test_ds:
103
+ manifest_filepath: ${test_path}
104
+ sample_rate: ${model.sample_rate}
105
+ batch_size: ${gen_ds_batch_size} # you may increase batch_size if your memory allows
106
+ shuffle: false
107
+ use_start_end_token: false
108
+ num_workers: 8
109
+ pin_memory: true
110
+
111
+ # recommend vocab size of 128 or 256 when training on ~1k hr datasets and 1k vocab size on 10+k hr datasets
112
+ # you may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py
113
+ tokenizer:
114
+ dir: ${vocab_dir} # path to directory which contains either tokenizer.model (bpe) or vocab.txt (wpe)
115
+ type: wpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)
116
+
117
+ preprocessor:
118
+ _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
119
+ sample_rate: ${model.sample_rate}
120
+ normalize: "per_feature"
121
+ window_size: 0.025
122
+ window_stride: 0.01
123
+ window: "hann"
124
+ features: 80
125
+ n_fft: 512
126
+ log: true
127
+ frame_splicing: 1
128
+ dither: 0.00001
129
+ pad_to: 0
130
+ pad_value: 0.0
131
+
132
+ spec_augment:
133
+ _target_: nemo.collections.asr.modules.SpectrogramAugmentation
134
+ freq_masks: 2 # set to zero to disable it
135
+ # you may use lower time_masks for smaller models to have a faster convergence
136
+ time_masks: ${aug_time_masks} # set to zero to disable it, from 10
137
+ freq_width: 27
138
+ time_width: 0.05
139
+
140
+ encoder:
141
+ _target_: nemo.collections.asr.modules.ConformerEncoder
142
+ feat_in: ${model.preprocessor.features}
143
+ feat_out: -1 # you may set it if you need different output size other than the default d_model
144
+ n_layers: ${n_layers}
145
+ d_model: ${d_model}
146
+
147
+ # Sub-sampling params
148
+ subsampling: dw_striding # vggnet, striding, stacking or stacking_norm, dw_striding
149
+ subsampling_factor: 8 # must be power of 2 for striding and vggnet
150
+ subsampling_conv_channels: 256 # -1 sets it to d_model = 256
151
+ causal_downsampling: false
152
+
153
+ # Feed forward module's params
154
+ ff_expansion_factor: 4
155
+
156
+ # Multi-headed Attention Module's params
157
+ self_attention_model: rel_pos # rel_pos or abs_pos
158
+ n_heads: ${n_heads} # may need to be lower for smaller d_models
159
+ # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
160
+ att_context_size: [-1, -1] # -1 means unlimited context
161
+ att_context_style: regular # regular or chunked_limited
162
+ xscaling: ${xscaling} # scales up the input embeddings by sqrt(d_model)
163
+ untie_biases: true # unties the biases of the TransformerXL layers
164
+ pos_emb_max_len: 5000
165
+ use_pytorch_sdpa: false #! use torch sdpa instead of manual attention
166
+ use_pytorch_sdpa_backends: [] # empty list means all backends https://pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html e.g. [MATH]
167
+
168
+ # Convolution module's params
169
+ conv_kernel_size: ${conv_kernel_size}
170
+ conv_norm_type: "batch_norm" # batch_norm or layer_norm or groupnormN (N specifies the number of groups)
171
+ # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size
172
+ # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0]
173
+ conv_context_size: null
174
+
175
+ ### regularization
176
+ dropout: 0.1 # The dropout used in most of the Conformer Modules
177
+ dropout_pre_encoder: 0.1 # The dropout used before the encoder
178
+ dropout_emb: 0.0 # The dropout used for embeddings
179
+ dropout_att: 0.1 # The dropout for multi-headed attention modules
180
+
181
+ # set to non-zero to enable stochastic depth
182
+ stochastic_depth_drop_prob: 0.0
183
+ stochastic_depth_mode: linear # linear or uniform
184
+ stochastic_depth_start_layer: 1
185
+
186
+ decoder:
187
+ _target_: nemo.collections.asr.modules.ConvASRDecoder
188
+ feat_in: null
189
+ num_classes: -1
190
+ vocabulary: []
191
+
192
+ # config for InterCTC loss: https://arxiv.org/abs/2102.03216
193
+ # specify loss weights and which layers to use for InterCTC
194
+ # e.g., to reproduce the paper results, set loss_weights: [0.3]
195
+ # and apply_at_layers: [8] (assuming 18 layers). Note that final
196
+ # layer loss coefficient is automatically adjusted (to 0.7 in above example)
197
+ interctc:
198
+ loss_weights: []
199
+ apply_at_layers: []
200
+
201
+ optim:
202
+ name: adamw
203
+ # lr: 1e-3 #! Select lr and decay, sched, warm_step
204
+ lr: ${lr}
205
+ # optimizer arguments
206
+ betas: [0.9, 0.98]
207
+ # less necessity for weight_decay as we already have large augmentations with SpecAug
208
+ # you may need weight_decay for large models, stable AMP training, small datasets, or when lower augmentations are used
209
+ # weight decay of 0.0 with lr of 2.0 also works fine
210
+ weight_decay: ${weight_decay}
211
+
212
+ # scheduler setup
213
+ sched:
214
+ name: CosineAnnealing
215
+ # scheduler config override
216
+ # warmup_steps: 15000
217
+ warmup_steps: ${warmup_steps}
218
+ warmup_ratio: null
219
+ min_lr: 1e-4
220
+
221
+ # ==============================================================================
222
+ # Dont use
223
+
224
+ trainer:
225
+ devices: -1 # number of GPUs, -1 would use all available GPUs
226
+ num_nodes: 1
227
+ max_epochs: ${num_epochs}
228
+ max_steps: -1 # computed at runtime if not set
229
+ val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
230
+ accelerator: auto
231
+ strategy:
232
+ _target_: lightning.pytorch.strategies.DDPStrategy
233
+ gradient_as_bucket_view: true
234
+ accumulate_grad_batches: ${accumulate_grad_batches}
235
+ gradient_clip_val: 0.0
236
+ precision: ${precision} # 16, 32, or bf16
237
+ log_every_n_steps: 10 # Interval of logging.
238
+ enable_progress_bar: True
239
+ num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
240
+ check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs
241
+ sync_batchnorm: true
242
+ enable_checkpointing: False # Provided by exp_manager
243
+ logger: false # Provided by exp_manager
244
+ benchmark: false # needs to be false for models with variable-length speech input as it slows down training
245
+
246
+ # ==============================================================================
247
+
248
+ exp_manager:
249
+ exp_dir: null
250
+ name: ${name}
251
+ create_tensorboard_logger: true
252
+ create_checkpoint_callback: true
253
+ checkpoint_callback_params:
254
+ # in case of multiple validation sets, first one is used
255
+ monitor: "val_wer"
256
+ mode: "min"
257
+ save_top_k: 5
258
+ always_save_nemo: True # saves the checkpoints as nemo files instead of PTL checkpoints
259
+
260
+ resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
261
+ # you need to set these two to True to continue the training
262
+ resume_if_exists: false
263
+ resume_ignore_no_checkpoint: false
264
+
265
+ # You may use this section to create a W&B logger
266
+ create_wandb_logger: false
267
+ wandb_logger_kwargs:
268
+ name: null
269
+ project: null
maintab_fast_conformer_10-09_01-54/code-folder/Demo/train.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ # Have first: V0 -> this
8
+ if True:
9
+ sys.path.append(os.getcwd())
10
+ if True:
11
+ from V0_Import.import_src import *
12
+ if True:
13
+ from Fast_conformer_nemo.utils.utils import *
14
+ run_import_src = True
15
+
16
+
17
+ # ==============================================================================
18
+ # Ultil functions
19
+
20
+
21
+ def set_all_seeds(seed=42):
22
+ random.seed(seed)
23
+ np.random.seed(seed)
24
+ torch.manual_seed(seed)
25
+ torch.cuda.manual_seed_all(seed)
26
+
27
+
28
+ def get_config_nemo():
29
+ # config_path = "Fast_conformer_nemo/configs/fast_conformer.yaml"
30
+ # if len(sys.argv) >= 2:
31
+ # config_path = sys.argv[1]
32
+ print(config_path, flush=True)
33
+ with open(config_path, "r", encoding="utf-8") as f:
34
+ yaml_text = f.read().strip()
35
+
36
+ _config = OmegaConf.create(yaml_text)
37
+ config_dict = OmegaConf.to_container(_config, resolve=True)
38
+ print(f"\n{config_dict['model']}\n{config_dict}", flush=True)
39
+ return config_dict
40
+
41
+
42
+ def get_train_dev_test(path): # Train is full, dev is 300, test is full
43
+ data = []
44
+ print(path, flush=True)
45
+ with open(path, "r", encoding="utf-8") as f:
46
+ for line in f:
47
+ if line.strip():
48
+ dic = json.loads(line)
49
+ data.append(dic)
50
+ return data
51
+
52
+
53
+ def get_info(train_path):
54
+ data = []
55
+ max_dur = 0.0
56
+ words = []
57
+ with open(train_path, "r", encoding="utf-8") as f: # Same code with above
58
+ for line in f:
59
+ if line.strip():
60
+ dic = json.loads(line)
61
+ data.append(dic)
62
+ max_dur = max(max_dur, dic["duration"])
63
+ words.extend(str(dic["text"]).split())
64
+
65
+ words = list(set(sorted(words)))
66
+ return data, max_dur, len(words)
67
+
68
+
69
+ # ==============================================================================
70
+ # Set global vars and data
71
+ if run_import_src:
72
+ set_all_seeds(42) # Set random seed for reproducibility
73
+ config_dict = get_config_nemo() # Get from path
74
+ data, max_dur, len_vocab = get_info(config_dict['train_path'])
75
+ sample_rate = 16000
76
+ dev_data, test_data = get_train_dev_test(
77
+ config_dict['dev_path']), get_train_dev_test(config_dict['test_path'])
78
+ full_val_data = get_train_dev_test(config_dict['full_val_path'])
79
+
80
+ log.info("Overall checking:")
81
+ log.info(f"Number of samples in manifest: {len(data)}")
82
+ log.info(
83
+ f"- Max duration in manifest: {max_dur:.2f} seconds, sample rate: {sample_rate}")
84
+ log.info(f"- Vocab in manifest: {len_vocab} units")
85
+ log.info(f"Number of dev is {len(dev_data)}, test is {len(test_data)}")
86
+
87
+
88
+ # ==============================================================================
89
+ # Create params, conf_model, mfpath and change st
90
+ if run_import_src:
91
+ params = copy.deepcopy(config_dict)['model']
92
+ train_mfpath = os.path.join(res_exp_dir, "_train_mf.json")
93
+ dev_mfpath = os.path.join(res_exp_dir, "_dev_mf.json")
94
+
95
+ for ds in ['train_ds', 'validation_ds']: # Change train and dev temporarily
96
+ if ds == 'train_ds':
97
+ params[ds]['manifest_filepath'] = train_mfpath
98
+ params[ds]['max_duration'] = round(max_dur + 0.1, 2)
99
+ else:
100
+ params[ds]['manifest_filepath'] = dev_mfpath
101
+ log.info(
102
+ f"Changed params['train_ds']['max_duration'] = {round(max_dur + 0.1, 2)} seconds \nand {train_mfpath}, {dev_mfpath} to write data!")
103
+
104
+ # Create an OmegaConf object from the dictionary
105
+ conf_model = OmegaConf.create(params)
106
+ write_txt_exp_dir("conf_model.txt", conf_model)
107
+ # Run pip freeze and capture output as string
108
+ pip_freeze_str = subprocess.check_output(["pip", "freeze"], text=True)
109
+ write_txt_exp_dir("pip_env.txt", pip_freeze_str)
110
+
111
+
112
+ # ==============================================================================
113
+ # Create subset from part and write it to mfpath
114
+
115
+
116
+ def create_train_dev(root_train_data: list, begin_idx, end_idx, num_train, num_dev, root_dev_data=None):
117
+ def get_min_max_duration(data):
118
+ mi, ma = 100.0, 0.0
119
+ for dic in data:
120
+ mi = min(mi, dic["duration"])
121
+ ma = max(ma, dic["duration"])
122
+ return mi, ma
123
+
124
+ def split_data(data: list, k):
125
+ if k >= len(data):
126
+ return data
127
+ _train_data, _test_data = train_test_split(
128
+ data, test_size=k, random_state=42
129
+ )
130
+ return _test_data
131
+
132
+ # # Sort by distribution
133
+ # data = list(sorted(root_train_data, key=lambda x: int(
134
+ # os.path.basename(x["audio_filepath"]).split("_")[0])))
135
+
136
+ # Select part
137
+ data = root_train_data.copy()
138
+ begin_idx = max(0, begin_idx)
139
+ end_idx = min(end_idx, len(data))
140
+ data = data[begin_idx:end_idx]
141
+ log.info(f"- Duration of this part: [{get_min_max_duration(data)}]")
142
+
143
+ # Select random subset/set from part: train from data, and dev from train or is root_dev
144
+ from tqdm import tqdm
145
+ num_train = min(num_train, len(data))
146
+ train_data = split_data(data, num_train)
147
+ if root_dev_data is None:
148
+ num_dev = min(num_dev, num_train)
149
+ dev_data = split_data(train_data, num_dev)
150
+ else:
151
+ dev_data = list(root_dev_data).copy()
152
+
153
+ # Write subset data back to a new file (or overwrite)
154
+ dev_in_train = True if root_dev_data is None else False
155
+ log.info(
156
+ f"- Number of train is {len(train_data)}, dev is {len(dev_data)}, dev in train: {dev_in_train}")
157
+ log.info(f"\n{train_data[0]}\n{dev_data[0]}\n")
158
+
159
+ with open(train_mfpath, "w", encoding="utf-8") as fout:
160
+ for item in train_data:
161
+ fout.write(json.dumps(item, ensure_ascii=False) + "\n")
162
+
163
+ with open(dev_mfpath, "w", encoding="utf-8") as fout:
164
+ for item in dev_data:
165
+ fout.write(json.dumps(item, ensure_ascii=False) + "\n")
166
+
167
+
168
+ # ==============================================================================
169
+ # Demo Nemo model and demo first
170
+ def init_nemo_model(data, dev_data, conf_model):
171
+ log.info(f"\n\nInit nemo model:")
172
+ create_train_dev(root_train_data=data, begin_idx=0, end_idx=len(data), num_train=len(
173
+ data), num_dev=-1, root_dev_data=dev_data) # Demo create data, v
174
+ trainer = create_new_trainer(epochs=1000, min_stop=0.0) # Demo trainer, v
175
+
176
+ # trainer.fit(nemo_model)
177
+ nemo_model = nemo_asr.models.EncDecCTCModelBPE(
178
+ cfg=conf_model, trainer=trainer)
179
+ summary = ModelSummary(nemo_model)
180
+ print(summary)
181
+ return nemo_model
182
+
183
+
184
+ # ==============================================================================
185
+ # Train model: Multi part training
186
+
187
+
188
+ def train_multi_turn(train_data, dev_data):
189
+ # Init nemo model
190
+ set_all_seeds(42)
191
+ global params, conf_model, res_exp_dir
192
+ nemo_model = init_nemo_model(
193
+ data=train_data, dev_data=dev_data, conf_model=conf_model)
194
+
195
+ # ! NOTE: Setting for multi part training
196
+ train_data = list(sorted(train_data, key=lambda x: x["duration"]))
197
+
198
+ cnt = len(train_data)
199
+ setting = {
200
+ "begin_idx": [0, 0],
201
+ "end_idx": [5000, cnt],
202
+ "num_train": [1000, cnt],
203
+ "num_dev": [100, -1],
204
+ "epochs": [100, 1000], # ! Not run enough epochs
205
+ "min_stop": [0.4, 0.0]
206
+ }
207
+ num_part = len(setting["begin_idx"])
208
+ trainer = None
209
+
210
+ for i in range(num_part):
211
+ begin_idx = setting["begin_idx"][i]
212
+ end_idx = setting["end_idx"][i]
213
+ num_train = setting["num_train"][i]
214
+ num_dev = setting["num_dev"][i]
215
+ num_epochs = setting["epochs"][i]
216
+ min_stop = setting["min_stop"][i]
217
+ log.info(
218
+ f"\n\n Here {i}: {begin_idx} --> {end_idx} | {num_train}, {num_dev}, {cnt} | {num_epochs}, {min_stop}")
219
+
220
+ # Create train dev file
221
+ if num_dev <= 0:
222
+ create_train_dev(root_train_data=train_data, begin_idx=begin_idx, end_idx=end_idx,
223
+ num_train=num_train, num_dev=num_dev, root_dev_data=dev_data)
224
+ else:
225
+ create_train_dev(root_train_data=train_data, begin_idx=begin_idx, end_idx=end_idx,
226
+ num_train=num_train, num_dev=num_dev)
227
+
228
+ # Create trainer newly
229
+ trainer = create_new_trainer(
230
+ epochs=num_epochs, min_stop=min_stop)
231
+
232
+ nemo_model.setup_training_data(
233
+ train_data_config=params['train_ds']) # Reload it
234
+ nemo_model.setup_validation_data(
235
+ val_data_config=params['validation_ds']) # Reload it
236
+ trainer.fit(nemo_model) # Fit
237
+
238
+ # Save it tmply
239
+ save_path = os.path.join(res_exp_dir, f"_nemo_model_part_{i}.nemo")
240
+ nemo_model.save_to(save_path)
241
+ return trainer, nemo_model
242
+
243
+
244
+ # =======================
245
+ def run_main_in_notebook():
246
+ global data, dev_data, res_exp_dir
247
+ trainer, nemo_model = train_multi_turn(data, dev_data) # Train with data
248
+
249
+ # ==========================================================================
250
+ # Get paths from checkpoint callback
251
+ # # last in list of callbacks if added last
252
+ ckpt_callback = trainer.callbacks[-1]
253
+ best_paths = list(ckpt_callback.best_k_models.keys())
254
+
255
+ # Reload and save
256
+ nemo_model, avg_weights = reload_nemo_from_avg(
257
+ best_paths=best_paths, nemo_model=nemo_model)
258
+ avg_ckpt_path = f"{res_exp_dir}/model_avg.ckpt"
259
+ nemo_model_path = f"{res_exp_dir}/nemo_model_avg.nemo"
260
+ save_model_to_path(nemo_model, avg_weights, nemo_model_path, avg_ckpt_path)
261
+
262
+ # Gen, score and save for mfpath
263
+ nemo_inference_for_mfpath(nemo_model, config_dict['dev_path'])
264
+ nemo_inference_for_mfpath(nemo_model, config_dict['test_path'])
265
+ nemo_inference_for_mfpath(nemo_model, config_dict['full_val_path'])
266
+ push_exp_dir_to_hub(res_exp_dir)
267
+
268
+
269
+ if __name__ == "__main__":
270
+ run_main_in_notebook()
maintab_fast_conformer_10-09_01-54/code-folder/Demo/utils.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import csv
3
+ import glob
4
+ import json
5
+ import logging
6
+ import logging as log
7
+ import os
8
+ import random
9
+ import re
10
+ import shutil
11
+ import string
12
+ import sys
13
+ import unicodedata
14
+
15
+ import jiwer
16
+ import lightning.pytorch as pl
17
+ import nemo
18
+ import nemo.collections.asr as nemo_asr
19
+ import numpy as np
20
+ import torch
21
+ from datasets import load_dataset
22
+ from jiwer import wer
23
+ from lightning.pytorch.callbacks import Callback, EarlyStopping, ModelCheckpoint
24
+ from lightning.pytorch.utilities.model_summary import ModelSummary
25
+ from omegaconf import OmegaConf
26
+ from scipy.io import wavfile
27
+
28
+ from V0_Import.import_src import push_file_to_hub
29
+
30
+
31
+ class LossLogger(Callback):
32
+ def __init__(self, exp_dir):
33
+ super().__init__()
34
+ self.train_losses = []
35
+ self.val_losses = []
36
+ self.train_wer = []
37
+ self.val_wer = []
38
+ self.num_last = 100 # ? epoch unit
39
+ self.num_plot = 100 # ? epoch
40
+ self.allow_show_plot = False # ? Allow show plot in notebook
41
+ self.exp_dir = exp_dir
42
+
43
+ def on_train_epoch_end(self, trainer, pl_module):
44
+ train_loss = trainer.callback_metrics.get('train_loss')
45
+ epoch_idx = trainer.current_epoch
46
+ lr = trainer.optimizers[0].param_groups[0]['lr'] # Print lr
47
+ log.info(f"Epoch {epoch_idx} ended." + "=" * 100)
48
+ if train_loss is not None:
49
+ self.train_losses.append(train_loss.item())
50
+ log.info(f"Train Loss: {train_loss.item()}, lr: {lr}")
51
+
52
+ if epoch_idx != 0 and epoch_idx % self.num_plot == 0:
53
+ self._plot_train()
54
+
55
+ def on_validation_epoch_end(self, trainer, pl_module):
56
+ val_loss = trainer.callback_metrics.get('val_loss')
57
+ val_wer = trainer.callback_metrics.get('val_wer')
58
+ if val_loss is not None:
59
+ self.val_losses.append(val_loss.item())
60
+ log.info(f"Validation Loss: {val_loss.item()}")
61
+ if val_wer is not None:
62
+ self.val_wer.append(val_wer.item())
63
+ log.info(f"Validation WER: {val_wer.item()}")
64
+
65
+ def _plot_train(self):
66
+ import matplotlib.pyplot as plt
67
+ plt.figure(figsize=(10, 6))
68
+ plt.subplot(2, 1, 1)
69
+ num = self.num_last
70
+ plt.plot(self.train_losses[-num:], label='Training Loss')
71
+ plt.plot(self.val_losses[-num:], label='Validation Loss')
72
+ plt.xlabel('Epoch')
73
+ plt.ylabel('Loss')
74
+ plt.legend()
75
+ plt.title('Training and Validation Loss')
76
+
77
+ plt.subplot(2, 1, 2)
78
+ plt.plot(self.train_wer[-num:], label='Training WER')
79
+ plt.plot(self.val_wer[-num:], label='Validation WER')
80
+ plt.xlabel('Epoch')
81
+ plt.ylabel('WER')
82
+ plt.legend()
83
+ plt.title('Training and Validation WER')
84
+ plt.tight_layout()
85
+ # allow_show_plot = True # Allow show plot in notebook
86
+ if self.allow_show_plot:
87
+ plt.show()
88
+ else:
89
+ plot_png = os.path.join(
90
+ self.exp_dir, f"training_process_{len(self.val_wer)}.png")
91
+ plt.savefig(plot_png)
92
+ push_file_to_hub(plot_png)
93
+
94
+ def on_train_end(self, trainer, pl_module):
95
+ self.num_last = len(self.val_wer)
96
+ self._plot_train()
97
+
98
+
99
+ config_path = "Fast_conformer_nemo/configs/fast_conformer.yaml" # ? NOTE: Setting
100
+ res_exp_dir = "results_fast_conformer" # ? NOTE: Setting
101
+ os.makedirs(res_exp_dir, exist_ok=True)
102
+ src_folder = "Fast_conformer_nemo"
103
+ dst_folder = os.path.join(res_exp_dir, "code-folder")
104
+ shutil.copytree(src_folder, dst_folder, dirs_exist_ok=True)
105
+ log.info(f"Copied code to {dst_folder}")
106
+
107
+
108
+ def write_txt_exp_dir(name, var):
109
+ path = os.path.join(res_exp_dir, name)
110
+ with open(path, "w", encoding="utf-8") as f:
111
+ f.write(str(var))
112
+ f.close()
113
+
114
+
115
+ # ==============================================================================
116
+
117
+
118
+ def create_time_callbacks(num_keep, min_stop, max_hour):
119
+ # num_keep = 500
120
+ early_stop_callback = EarlyStopping(
121
+ monitor="val_wer", # Metric to monitor
122
+ mode="min", # Lower is better
123
+ stopping_threshold=min_stop, # Stop if val_wer < 0.x
124
+ patience=num_keep, # Stop immediately when not reduce
125
+ verbose=True
126
+ )
127
+ # Keep top 5 checkpoints based on val_wer
128
+ num_avg = 5
129
+ save_last = False
130
+ checkpoint_callback = ModelCheckpoint(
131
+ dirpath=f"{res_exp_dir}/ckpts", # Dir of ckpts
132
+ filename="epoch{epoch}-valwer{val_wer:.4f}",
133
+ monitor="val_wer",
134
+ mode="min",
135
+ save_top_k=num_avg, # Only keep 5 best
136
+ save_last=save_last, # Also save last epoch: False
137
+ )
138
+ # max_time_training = "00:09:00:00"
139
+ max_time_training = f"00:{max_hour}:02:00"
140
+ callback_list = [LossLogger(res_exp_dir),
141
+ early_stop_callback, checkpoint_callback]
142
+ return max_time_training, callback_list
143
+
144
+
145
+ def create_new_trainer(epochs, min_stop, max_hour="09"):
146
+ # NOTE: Setting
147
+ max_hour = "09" # ! Must edit when run
148
+ log.info(f"Hour to train is {max_hour}")
149
+ setting = {
150
+ 'num_keep': 500,
151
+ 'precision': 'bf16', # ! Use AMP
152
+ 'accumulate_grad_batches': 1,
153
+ 'max_hour': max_hour,
154
+ 'enable_progress_bar': False, # ! Off bar training to shorter log
155
+ }
156
+ # Create callbacks
157
+ max_time_training, callback_list = create_time_callbacks(
158
+ num_keep=setting['num_keep'], min_stop=min_stop, max_hour=max_hour)
159
+ # Training args
160
+ trainer_dict = {
161
+ # Hardware
162
+ 'precision': setting['precision'], # Trade-off
163
+ 'devices': 1,
164
+ 'num_nodes': 1,
165
+ 'accelerator': 'gpu',
166
+ 'strategy': 'auto', # Must: no multi gpu
167
+ # Training
168
+ 'max_epochs': epochs,
169
+ 'accumulate_grad_batches': setting['accumulate_grad_batches'],
170
+ 'gradient_clip_val': 0.0,
171
+ # Prediction monitor
172
+ 'log_every_n_steps': 100, # Logging in a epoch train
173
+ 'val_check_interval': 1.0, # Compute wer after 1.0 epoch
174
+ # No-related
175
+ 'enable_progress_bar': setting['enable_progress_bar'],
176
+ 'num_sanity_val_steps': 0,
177
+ 'check_val_every_n_epoch': 1,
178
+ # If True, enables cudnn benchmarking for faster training.
179
+ 'sync_batchnorm': True,
180
+ 'benchmark': False,
181
+ # Saving and callback: New setting for callbacks
182
+ 'enable_checkpointing': True,
183
+ 'max_time': max_time_training,
184
+ 'callbacks': callback_list,
185
+ }
186
+ write_txt_exp_dir("args_trainer.txt", trainer_dict)
187
+ trainer = pl.Trainer(**trainer_dict)
188
+ return trainer
189
+
190
+
191
+ # ==============================================================================
192
+
193
+
194
+ def reload_nemo_from_avg(best_paths, nemo_model):
195
+ w_only = False # NOTE: Use w_only = False because it error
196
+ load_strict = False
197
+
198
+ def average_checkpoints(paths):
199
+ avg_state_dict = None
200
+ for path in paths:
201
+ ckpt = torch.load(path, map_location="cpu",
202
+ weights_only=w_only)["state_dict"]
203
+ if avg_state_dict is None:
204
+ avg_state_dict = {k: v.clone() for k, v in ckpt.items()}
205
+ else:
206
+ for k in avg_state_dict:
207
+ # if it's int/bool, leave as-is
208
+ if torch.is_floating_point(avg_state_dict[k]):
209
+ avg_state_dict[k] += ckpt[k]
210
+ for k in avg_state_dict:
211
+ if torch.is_floating_point(avg_state_dict[k]):
212
+ avg_state_dict[k] /= len(paths)
213
+ return avg_state_dict
214
+
215
+ # Average
216
+ log.info(f"\n\nBest paths for AVG(model): {best_paths}")
217
+ avg_weights = average_checkpoints(best_paths)
218
+ # Assign averaged weights to NeMo model
219
+ nemo_model = nemo_model.to("cuda" if torch.cuda.is_available() else "cpu")
220
+ nemo_model.load_state_dict(avg_weights, strict=load_strict)
221
+ return nemo_model, avg_weights
222
+
223
+
224
+ def save_model_to_path(nemo_model, avg_weights, nemo_model_path, avg_ckpt_path):
225
+ torch.save({"state_dict": avg_weights}, avg_ckpt_path)
226
+ nemo_model.save_to(nemo_model_path)
227
+ log.info(f"\n\nSaved avg weights (.ckpt) at {avg_ckpt_path}")
228
+ log.info(f"Saved averaged NeMo model at {nemo_model_path}")
229
+
230
+
231
+ def nemo_inference_for_mfpath(nemo_model, mfpath):
232
+ def save_gen_list(text_list, gt_list):
233
+ random_name = ''.join(random.choices(
234
+ string.ascii_lowercase + string.digits, k=8))
235
+ file_path = f"{random_name}.csv"
236
+ # Save rd name
237
+ file_path = os.path.join(res_exp_dir, file_path)
238
+ log.info(f"Saved gen at {file_path}")
239
+ # Write it as .csv
240
+ with open(file_path, mode="w", newline="", encoding="utf-8") as f:
241
+ writer = csv.writer(f)
242
+ writer.writerow(["Gen", "GT"]) # header
243
+ for first, second in zip(text_list, gt_list):
244
+ writer.writerow([first, second])
245
+
246
+ with open(mfpath, "r", encoding="utf-8") as fin:
247
+ data = [json.loads(line) for line in fin]
248
+ log.info(f"\n\nLoaded {len(data)} entries from {mfpath}")
249
+
250
+ references = []
251
+ predictions = []
252
+ from tqdm import tqdm
253
+ for entry in data: # Limit data if need
254
+ ref = entry['text']
255
+ audio_path = entry['audio_filepath']
256
+ with torch.no_grad():
257
+ pred = nemo_model.transcribe(audio_path, verbose=False)[0].text
258
+ # if use_norm:
259
+ # pred = normalize_text_vietnamese(pred)
260
+ references.append(ref)
261
+ predictions.append(pred)
262
+
263
+ # Computer wer
264
+ wer_score = wer(references, predictions)
265
+ log.info(f"WER: {wer_score}")
266
+
267
+ # Save pred
268
+ save_gen_list(text_list=predictions, gt_list=references)
269
+ return wer_score
maintab_fast_conformer_10-09_01-54/code-folder/__pycache__/train.cpython-311.pyc ADDED
Binary file (16 kB). View file
 
maintab_fast_conformer_10-09_01-54/code-folder/configs/fast_conformer.yaml ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # It contains the default values for training a Fast Conformer-CTC ASR model, #large size (~120M) with CTC loss and sub-word encoding.
2
+
3
+ # You may find more info about FastConformer here: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#fast-conformer
4
+
5
+ # We suggest to use trainer.precision=bf16 for GPUs which support it otherwise trainer.precision=16 is recommended.
6
+ # Using bf16 or 16 would make it possible to double the batch size and speedup training/inference. If fp16 is not stable and model diverges after some epochs, you may use fp32.
7
+ # Here are the suggested batch size per GPU for each precision and memory sizes:
8
+ #! fp16 >> 32 --> batch_size = 16
9
+ # +-----------+------------+------------+
10
+ # | Precision | GPU Memory | Batch Size |
11
+ # +===========+============+============+
12
+ # | 32 | 16GB | 16 |
13
+ # | | 32GB | 32 |
14
+ # | | 80GB | 64 |
15
+ # +-----------+------------+------------+
16
+ # | fp16 or | 16GB | 32 |
17
+ # | bf16 | 32GB | 64 |
18
+ # | | 80GB | 128 |
19
+ # +-----------+------------+------------+
20
+ # Here are the recommended configs for different variants of FastConformer-CTC-BPE, other parameters are the same as in this config file.
21
+ #
22
+ # +--------------+---------+---------+----------+----------------+--------------+--------------------------+-----------------+------------+
23
+ # | Model | d_model | n_heads | n_layers |conv_kernel_size| weight_decay | pred_hidden/joint_hidden | pred_rnn_layers | xscaling |
24
+ # +==============+=========+========+===========+================+==============+==========================+=================+============+
25
+ # | Small (14M) | 176 | 4 | 16 | 9 | 0.0 | 320 | 1 | True |
26
+ # +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
27
+ # | Medium (32M) | 256 | 4 | 16 | 9 | 1e-3 | 640 | 1 | True |
28
+ # +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
29
+ # | Large (120M) | 512 | 8 | 17 | 9 | 1e-3 | 640 | 1 | True |
30
+ # +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
31
+ # | XLarge (616M)| 1024 | 8 | 24 | 9 | 1e-3 | 640 | 2 | False |
32
+ # +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
33
+ # | XXLarge(1.2B)| 1024 | 8 | 42 | 5 | 1e-3 | 640 | 2 | False |
34
+ # +--------------------------------------------------------------+--------------+--------------------------+-----------------+------------+
35
+
36
+ # Note: They are based on the assumption of max_duration of #20. If you have longer or shorter max_duration, then batch sizes may need to get updated accordingly.
37
+
38
+ # Default learning parameters in this config are set for global batch size of 2K while you may use lower values.
39
+ # To increase the global batch size with limited number of GPUs, you may use higher accumulate_grad_batches.
40
+ # However accumulate_grad_batches is better to be avoided as long as the global batch size is large enough and training is stable.
41
+
42
+ name: "FastConformer-CTC-BPE"
43
+ train_path: "V1_Setup/Out/1p/train_ds.json" # ! Must edit when run
44
+ dev_path: "V1_Setup/Out/1p/test_ds_2x5.json"
45
+ test_path: "V1_Setup/Out/1p/test_ds.json"
46
+ vocab_dir: "V1_Setup/Out/1p/" #* correct
47
+ full_val_path: "V1_Setup/Out/1p/dev_ds.json"
48
+
49
+ train_ds_batch_size: 16
50
+ gen_ds_batch_size: 16
51
+ ds_max_duration: 32.0 #! Will be replaced
52
+
53
+ # Model parameters
54
+ d_model: 256
55
+ n_heads: 4
56
+ n_layers: 16
57
+ conv_kernel_size: 9
58
+ xscaling: true
59
+
60
+ # Optimize
61
+ lr: 1e-3 #! Select lr and decay = 0.0, sched, warm_step
62
+ weight_decay: 1e-3
63
+ warmup_steps: 15000
64
+ aug_time_masks: 2 # Change from 10 to faster smooth optimize
65
+
66
+ # Training parameters # Dont use # Dont use
67
+ num_epochs: 1000
68
+ precision: 32
69
+ accumulate_grad_batches: 1
70
+ subsampling_factor: 8
71
+
72
+ model:
73
+ sample_rate: 16000
74
+ log_prediction: false #! (change from true to false) enables logging sample predictions in the output during training
75
+ ctc_reduction: "mean_volume"
76
+ skip_nan_grad: false
77
+
78
+ train_ds:
79
+ manifest_filepath: ${train_path}
80
+ sample_rate: ${model.sample_rate}
81
+ batch_size: ${train_ds_batch_size} # you may increase batch_size if your memory allows
82
+ shuffle: true
83
+ num_workers: 8
84
+ pin_memory: true
85
+ max_duration: ${ds_max_duration} # it is set for LibriSpeech, you may need to update it for your dataset
86
+ min_duration: 0.1
87
+ # tarred datasets
88
+ is_tarred: false
89
+ tarred_audio_filepaths: null
90
+ shuffle_n: 2048
91
+ # bucketing params
92
+ bucketing_strategy: "fully_randomized"
93
+ bucketing_batch_size: null
94
+
95
+ validation_ds:
96
+ manifest_filepath: ${dev_path}
97
+ sample_rate: ${model.sample_rate}
98
+ batch_size: ${gen_ds_batch_size} # you may increase batch_size if your memory allows
99
+ shuffle: false
100
+ use_start_end_token: false
101
+ num_workers: 8
102
+ pin_memory: true
103
+
104
+ test_ds:
105
+ manifest_filepath: ${test_path}
106
+ sample_rate: ${model.sample_rate}
107
+ batch_size: ${gen_ds_batch_size} # you may increase batch_size if your memory allows
108
+ shuffle: false
109
+ use_start_end_token: false
110
+ num_workers: 8
111
+ pin_memory: true
112
+
113
+ # recommend vocab size of 128 or 256 when training on ~1k hr datasets and 1k vocab size on 10+k hr datasets
114
+ # you may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py
115
+ tokenizer:
116
+ dir: ${vocab_dir} # path to directory which contains either tokenizer.model (bpe) or vocab.txt (wpe)
117
+ type: wpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)
118
+
119
+ preprocessor:
120
+ _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
121
+ sample_rate: ${model.sample_rate}
122
+ normalize: "per_feature"
123
+ window_size: 0.025
124
+ window_stride: 0.01
125
+ window: "hann"
126
+ features: 80
127
+ n_fft: 512
128
+ log: true
129
+ frame_splicing: 1
130
+ dither: 0.00001
131
+ pad_to: 0
132
+ pad_value: 0.0
133
+
134
+ spec_augment:
135
+ _target_: nemo.collections.asr.modules.SpectrogramAugmentation
136
+ freq_masks: 2 # set to zero to disable it
137
+ # you may use lower time_masks for smaller models to have a faster convergence
138
+ time_masks: ${aug_time_masks} # ! set to zero to disable it, from 10
139
+ freq_width: 27
140
+ time_width: 0.05
141
+
142
+ encoder:
143
+ _target_: nemo.collections.asr.modules.ConformerEncoder
144
+ feat_in: ${model.preprocessor.features}
145
+ feat_out: -1 # you may set it if you need different output size other than the default d_model
146
+ n_layers: ${n_layers}
147
+ d_model: ${d_model}
148
+
149
+ # Sub-sampling params
150
+ subsampling: dw_striding # vggnet, striding, stacking or stacking_norm, dw_striding
151
+ subsampling_factor: ${subsampling_factor} # ! must be power of 2 for striding and vggnet
152
+ subsampling_conv_channels: 256 # -1 sets it to d_model = 256
153
+ causal_downsampling: false
154
+
155
+ # Feed forward module's params
156
+ ff_expansion_factor: 4
157
+
158
+ # Multi-headed Attention Module's params
159
+ self_attention_model: rel_pos # rel_pos or abs_pos
160
+ n_heads: ${n_heads} # may need to be lower for smaller d_models
161
+ # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
162
+ att_context_size: [-1, -1] # -1 means unlimited context
163
+ att_context_style: regular # regular or chunked_limited
164
+ xscaling: ${xscaling} # scales up the input embeddings by sqrt(d_model)
165
+ untie_biases: true # unties the biases of the TransformerXL layers
166
+ pos_emb_max_len: 5000
167
+ use_pytorch_sdpa: false #! use torch sdpa instead of manual attention
168
+ use_pytorch_sdpa_backends: [] # empty list means all backends https://pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html e.g. [MATH]
169
+
170
+ # Convolution module's params
171
+ conv_kernel_size: ${conv_kernel_size}
172
+ conv_norm_type: "batch_norm" # batch_norm or layer_norm or groupnormN (N specifies the number of groups)
173
+ # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size
174
+ # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0]
175
+ conv_context_size: null
176
+
177
+ ### regularization
178
+ dropout: 0.1 # The dropout used in most of the Conformer Modules
179
+ dropout_pre_encoder: 0.1 # The dropout used before the encoder
180
+ dropout_emb: 0.0 # The dropout used for embeddings
181
+ dropout_att: 0.1 # The dropout for multi-headed attention modules
182
+
183
+ # set to non-zero to enable stochastic depth
184
+ stochastic_depth_drop_prob: 0.0
185
+ stochastic_depth_mode: linear # linear or uniform
186
+ stochastic_depth_start_layer: 1
187
+
188
+ decoder:
189
+ _target_: nemo.collections.asr.modules.ConvASRDecoder
190
+ feat_in: null
191
+ num_classes: -1
192
+ vocabulary: []
193
+
194
+ # config for InterCTC loss: https://arxiv.org/abs/2102.03216
195
+ # specify loss weights and which layers to use for InterCTC
196
+ # e.g., to reproduce the paper results, set loss_weights: [0.3]
197
+ # and apply_at_layers: [8] (assuming 18 layers). Note that final
198
+ # layer loss coefficient is automatically adjusted (to 0.7 in above example)
199
+ interctc:
200
+ loss_weights: []
201
+ apply_at_layers: []
202
+
203
+ optim:
204
+ name: adamw
205
+ # lr: 1e-3 #! Select lr and decay, sched, warm_step
206
+ lr: ${lr}
207
+ # optimizer arguments
208
+ betas: [0.9, 0.98]
209
+ # less necessity for weight_decay as we already have large augmentations with SpecAug
210
+ # you may need weight_decay for large models, stable AMP training, small datasets, or when lower augmentations are used
211
+ # weight decay of 0.0 with lr of 2.0 also works fine
212
+ weight_decay: ${weight_decay}
213
+
214
+ # scheduler setup
215
+ sched:
216
+ name: CosineAnnealing
217
+ # scheduler config override
218
+ # warmup_steps: 15000
219
+ warmup_steps: ${warmup_steps}
220
+ warmup_ratio: null
221
+ min_lr: 1e-4
222
+
223
+ # ==============================================================================
224
+ # Dont use
225
+
226
+ trainer:
227
+ devices: -1 # number of GPUs, -1 would use all available GPUs
228
+ num_nodes: 1
229
+ max_epochs: ${num_epochs}
230
+ max_steps: -1 # computed at runtime if not set
231
+ val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
232
+ accelerator: auto
233
+ strategy:
234
+ _target_: lightning.pytorch.strategies.DDPStrategy
235
+ gradient_as_bucket_view: true
236
+ accumulate_grad_batches: ${accumulate_grad_batches}
237
+ gradient_clip_val: 0.0
238
+ precision: ${precision} # 16, 32, or bf16
239
+ log_every_n_steps: 10 # Interval of logging.
240
+ enable_progress_bar: True
241
+ num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
242
+ check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs
243
+ sync_batchnorm: true
244
+ enable_checkpointing: False # Provided by exp_manager
245
+ logger: false # Provided by exp_manager
246
+ benchmark: false # needs to be false for models with variable-length speech input as it slows down training
247
+
248
+ # ==============================================================================
249
+
250
+ exp_manager:
251
+ exp_dir: null
252
+ name: ${name}
253
+ create_tensorboard_logger: true
254
+ create_checkpoint_callback: true
255
+ checkpoint_callback_params:
256
+ # in case of multiple validation sets, first one is used
257
+ monitor: "val_wer"
258
+ mode: "min"
259
+ save_top_k: 5
260
+ always_save_nemo: True # saves the checkpoints as nemo files instead of PTL checkpoints
261
+
262
+ resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
263
+ # you need to set these two to True to continue the training
264
+ resume_if_exists: false
265
+ resume_ignore_no_checkpoint: false
266
+
267
+ # You may use this section to create a W&B logger
268
+ create_wandb_logger: false
269
+ wandb_logger_kwargs:
270
+ name: null
271
+ project: null
maintab_fast_conformer_10-09_01-54/code-folder/train.py ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ # Have first: V0 -> this
8
+ if True:
9
+ sys.path.append(os.getcwd())
10
+ if True:
11
+ from V0_Import.import_src import *
12
+ if True:
13
+ from V2_Run.Fast_conformer_nemo.utils.utils import *
14
+ run_import_src = True
15
+
16
+
17
+ # ==============================================================================
18
+ # Ultil functions
19
+ # Dont need to edit, please
20
+
21
+
22
+ def set_all_seeds(seed=42):
23
+ random.seed(seed)
24
+ np.random.seed(seed)
25
+ torch.manual_seed(seed)
26
+ torch.cuda.manual_seed_all(seed)
27
+
28
+
29
+ def get_config_nemo():
30
+ # config_path = "Fast_conformer_nemo/configs/fast_conformer.yaml"
31
+ # if len(sys.argv) >= 2:
32
+ # config_path = sys.argv[1]
33
+ print(config_path, flush=True)
34
+ with open(config_path, "r", encoding="utf-8") as f:
35
+ yaml_text = f.read().strip()
36
+
37
+ _config = OmegaConf.create(yaml_text)
38
+ config_dict = OmegaConf.to_container(_config, resolve=True)
39
+ print(f"\n{config_dict['model']}\n{config_dict}", flush=True)
40
+ return config_dict
41
+
42
+
43
+ def get_train_dev_test(path): # Train is full, dev is 300, test is full
44
+ data = []
45
+ print(path, flush=True)
46
+ with open(path, "r", encoding="utf-8") as f:
47
+ for line in f:
48
+ if line.strip():
49
+ dic = json.loads(line)
50
+ data.append(dic)
51
+ return data
52
+
53
+
54
+ def get_info(train_path):
55
+ data = []
56
+ max_dur = 0.0
57
+ max_lens = []
58
+ words = []
59
+ with open(train_path, "r", encoding="utf-8") as f: # Same code with above
60
+ for line in f:
61
+ if line.strip():
62
+ dic = json.loads(line)
63
+ data.append(dic)
64
+ max_dur = max(max_dur, dic["duration"])
65
+ max_lens.append(len(str(dic["text"]).split()))
66
+ words.extend(str(dic["text"]).split())
67
+
68
+ words = list(set(sorted(words)))
69
+ return data, max_dur, max_lens, len(words)
70
+
71
+
72
+ # ==============================================================================
73
+ # Set global vars and data
74
+ if run_import_src:
75
+ set_all_seeds(42) # Set random seed for reproducibility
76
+ config_dict = get_config_nemo() # Get from path
77
+ data, max_dur, max_lens, len_vocab = get_info(config_dict['train_path'])
78
+ sample_rate = 16000
79
+ dev_data, test_data = get_train_dev_test(
80
+ config_dict['dev_path']), get_train_dev_test(config_dict['test_path'])
81
+ full_val_data = get_train_dev_test(config_dict['full_val_path'])
82
+
83
+ log.info("Overall checking:")
84
+ log.info(f"Number of samples in manifest: {len(data)}")
85
+ log.info(
86
+ f"- Max duration in manifest: {max_dur:.2f} seconds, sample rate: {sample_rate}")
87
+ log.info(
88
+ f"- Vocab in manifest: {len_vocab} units, max length (L tokens): {np.mean(max_lens):.2f}, {max(max_lens)}")
89
+ log.info(f"Number of dev is {len(dev_data)}, test is {len(test_data)}")
90
+
91
+
92
+ # ==============================================================================
93
+ # Create params, conf_model, mfpath and change st
94
+ if run_import_src:
95
+ params = copy.deepcopy(config_dict)['model']
96
+ train_mfpath = os.path.join(res_exp_dir, "_train_mf.json")
97
+ dev_mfpath = os.path.join(res_exp_dir, "_dev_mf.json")
98
+
99
+ for ds in ['train_ds', 'validation_ds']: # Change train and dev temporarily
100
+ if ds == 'train_ds':
101
+ params[ds]['manifest_filepath'] = train_mfpath
102
+ params[ds]['max_duration'] = round(max_dur + 0.1, 2)
103
+ else:
104
+ params[ds]['manifest_filepath'] = dev_mfpath
105
+ log.info(
106
+ f"Changed params['train_ds']['max_duration'] = {round(max_dur + 0.1, 2)} seconds \nand {train_mfpath}, {dev_mfpath} to write data!")
107
+
108
+ # Create an OmegaConf object from the dictionary
109
+ conf_model = OmegaConf.create(params)
110
+ write_txt_exp_dir("conf_model.txt", conf_model)
111
+ # Run pip freeze and capture output as string
112
+ pip_freeze_str = subprocess.check_output(
113
+ ["git", "log", "-1",
114
+ '--pretty=format:%h %s%nAuthor date: %ad%nCommit date: %cd', "--date=iso"],
115
+ text=True
116
+ ) + "\n"
117
+ pip_freeze_str += subprocess.check_output(
118
+ ["python", "--version"], text=True) + "\n"
119
+ pip_freeze_str += subprocess.check_output(
120
+ ["pip", "freeze"], text=True) + "\n"
121
+ write_txt_exp_dir("git_pip_env.txt", pip_freeze_str)
122
+
123
+ # Some for training info
124
+ log.info(f"train_ds_batch_size: {config_dict['train_ds_batch_size']}")
125
+ log.info(
126
+ f"warmup_steps: {config_dict['warmup_steps']}, lr: {config_dict['lr']}, weight_decay: {config_dict['weight_decay']}")
127
+ # log.info(f"subsampling_factor: {config_dict['subsampling_factor']}")
128
+
129
+
130
+ # ==============================================================================
131
+ # Create subset from part and write it to global mfpath
132
+
133
+
134
+ def create_train_dev(root_train_data: list, begin_idx, end_idx, num_train, num_dev, root_dev_data=None):
135
+ global train_mfpath, dev_mfpath
136
+
137
+ def get_min_max_duration(data):
138
+ mi, ma = 100.0, 0.0
139
+ for dic in data:
140
+ mi = min(mi, dic["duration"])
141
+ ma = max(ma, dic["duration"])
142
+ return mi, ma
143
+
144
+ def split_data(data: list, k):
145
+ if k >= len(data):
146
+ return data
147
+ _train_data, _test_data = train_test_split(
148
+ data, test_size=k, random_state=42
149
+ )
150
+ return _test_data
151
+
152
+ # # Sort by distribution
153
+ # data = list(sorted(root_train_data, key=lambda x: int(
154
+ # os.path.basename(x["audio_filepath"]).split("_")[0])))
155
+
156
+ # Select part
157
+ data = root_train_data.copy()
158
+ begin_idx = max(0, begin_idx)
159
+ end_idx = min(end_idx, len(data))
160
+ data = data[begin_idx:end_idx]
161
+ log.info(f"- Duration of this part: [{get_min_max_duration(data)}]")
162
+
163
+ # Select random subset/set from part: train from data, and dev from train or is root_dev
164
+ from tqdm import tqdm
165
+ num_train = min(num_train, len(data))
166
+ train_data = split_data(data, num_train)
167
+ if root_dev_data is None:
168
+ num_dev = min(num_dev, num_train)
169
+ dev_data = split_data(train_data, num_dev)
170
+ else:
171
+ dev_data = list(root_dev_data).copy()
172
+
173
+ # Write subset data back to a new file (or overwrite)
174
+ dev_in_train = True if root_dev_data is None else False
175
+ log.info(
176
+ f"- Number of train is {len(train_data)}, dev is {len(dev_data)}, dev in train: {dev_in_train}")
177
+ log.info(f"\n{train_data[0]}\n{dev_data[0]}\n")
178
+
179
+ with open(train_mfpath, "w", encoding="utf-8") as fout:
180
+ for item in train_data:
181
+ fout.write(json.dumps(item, ensure_ascii=False) + "\n")
182
+
183
+ with open(dev_mfpath, "w", encoding="utf-8") as fout:
184
+ for item in dev_data:
185
+ fout.write(json.dumps(item, ensure_ascii=False) + "\n")
186
+
187
+
188
+ # ==============================================================================
189
+ # Demo Nemo model and demo first
190
+ def init_nemo_model(data, dev_data, conf_model):
191
+ log.info(f"\n\nInit nemo model:")
192
+ create_train_dev(root_train_data=data, begin_idx=0, end_idx=len(data), num_train=len(
193
+ data), num_dev=-1, root_dev_data=dev_data) # Demo create data, v
194
+ trainer = create_new_trainer(epochs=1000, min_stop=0.0) # Demo trainer, v
195
+
196
+ # trainer.fit(nemo_model)
197
+ nemo_model = nemo_asr.models.EncDecCTCModelBPE(
198
+ cfg=conf_model, trainer=trainer)
199
+
200
+ # print(nemo_model)
201
+ summary = ModelSummary(nemo_model)
202
+ print(summary)
203
+ write_txt_exp_dir("model_mp.txt", summary) # Write MP of model
204
+ check_tokenizer(nemo_model)
205
+ return nemo_model
206
+
207
+
208
+ def check_tokenizer(nemo_model):
209
+ # Inspect the tokenizer inside your model
210
+ print("Tokenizer type:", nemo_model.tokenizer.__class__.__name__)
211
+ print("Vocab size:", nemo_model.tokenizer.vocab_size)
212
+
213
+ # Show first few vocab tokens
214
+ # if hasattr(nemo_model.tokenizer, "vocab"):
215
+ # print("First tokens:", list(nemo_model.tokenizer.vocab.keys())[:20])
216
+
217
+ # Test text → ids → text round-trip
218
+ sample = "016 017 100"
219
+ ids = nemo_model.tokenizer.text_to_ids(sample)
220
+ print("Text:", sample)
221
+ print("IDs:", ids)
222
+ print("Back to text:", nemo_model.tokenizer.ids_to_text(ids))
223
+
224
+
225
+ # ==============================================================================
226
+ # Train model: Multi part training
227
+
228
+
229
+ def train_multi_turn(train_data, dev_data):
230
+ # Init nemo model
231
+ set_all_seeds(42)
232
+ global params, conf_model, res_exp_dir
233
+ nemo_model = init_nemo_model(
234
+ data=train_data, dev_data=dev_data, conf_model=conf_model)
235
+
236
+ # ! Setting for multi part training
237
+ train_data = list(sorted(train_data, key=lambda x: x["duration"]))
238
+
239
+ cnt = len(train_data)
240
+ setting = {
241
+ "begin_idx": [0, 0],
242
+ "end_idx": [5000, cnt],
243
+ "num_train": [1000, cnt],
244
+ "num_dev": [100, -1],
245
+ "epochs": [200, 1000], # ! Not run enough epochs
246
+ "min_stop": [0.4, 0.0] # ! 0.4 --> 0.25
247
+ }
248
+ num_part = len(setting["begin_idx"])
249
+ trainer = None
250
+
251
+ for i in range(num_part):
252
+ begin_idx = setting["begin_idx"][i]
253
+ end_idx = setting["end_idx"][i]
254
+ num_train = setting["num_train"][i]
255
+ num_dev = setting["num_dev"][i]
256
+ num_epochs = setting["epochs"][i]
257
+ min_stop = setting["min_stop"][i]
258
+ log.info(
259
+ f"\n\n Here {i}: {begin_idx} --> {end_idx} | {num_train}, {num_dev}, {cnt} | {num_epochs}, {min_stop}")
260
+
261
+ # Create train dev file
262
+ if num_dev <= 0:
263
+ create_train_dev(root_train_data=train_data, begin_idx=begin_idx, end_idx=end_idx,
264
+ num_train=num_train, num_dev=num_dev, root_dev_data=dev_data)
265
+ else:
266
+ create_train_dev(root_train_data=train_data, begin_idx=begin_idx, end_idx=end_idx,
267
+ num_train=num_train, num_dev=num_dev)
268
+
269
+ # Create trainer newly
270
+ trainer = create_new_trainer(
271
+ epochs=num_epochs, min_stop=min_stop)
272
+
273
+ nemo_model.setup_training_data(
274
+ train_data_config=params['train_ds']) # Reload it
275
+ nemo_model.setup_validation_data(
276
+ val_data_config=params['validation_ds']) # Reload it
277
+ trainer.fit(nemo_model) # Fit
278
+
279
+ # Save it tmply
280
+ save_path = os.path.join(res_exp_dir, f"_nemo_model_part_{i}.nemo")
281
+ nemo_model.save_to(save_path)
282
+ return trainer, nemo_model
283
+
284
+
285
+ # =======================
286
+ # Dont need to edit, please
287
+
288
+
289
+ def run_main_in_notebook():
290
+ global data, dev_data, res_exp_dir
291
+ trainer, nemo_model = train_multi_turn(data, dev_data) # Train with data
292
+
293
+ # ==========================================================================
294
+ # Get paths from checkpoint callback
295
+ # # last in list of callbacks if added last
296
+ ckpt_callback = trainer.callbacks[-1]
297
+ best_paths = list(ckpt_callback.best_k_models.keys())
298
+
299
+ # Reload and save
300
+ nemo_model, avg_weights = reload_nemo_from_avg(
301
+ best_paths=best_paths, nemo_model=nemo_model)
302
+ avg_ckpt_path = f"{res_exp_dir}/model_avg.ckpt"
303
+ nemo_model_path = f"{res_exp_dir}/nemo_model_avg.nemo"
304
+ save_model_to_path(nemo_model, avg_weights, nemo_model_path, avg_ckpt_path)
305
+
306
+ # Gen, score and save for mfpath
307
+ nemo_inference_for_mfpath(nemo_model, config_dict['dev_path'])
308
+ nemo_inference_for_mfpath(nemo_model, config_dict['test_path'])
309
+ nemo_inference_for_mfpath(nemo_model, config_dict['full_val_path'])
310
+ push_exp_dir_to_hub(res_exp_dir)
311
+ return trainer, nemo_model
312
+
313
+
314
+ if __name__ == "__main__":
315
+ run_main_in_notebook()
maintab_fast_conformer_10-09_01-54/code-folder/utils/__pycache__/utils.cpython-311.pyc ADDED
Binary file (15.9 kB). View file
 
maintab_fast_conformer_10-09_01-54/code-folder/utils/install_cmd.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_cmd("pip install -q datasets==3.6.0")
2
+ run_cmd("pip install -q numpy==1.26.4")
3
+ run_cmd("pip install -q scipy==1.15.2")
4
+ run_cmd("pip install -q omegaconf==2.3.0")
5
+
6
+
7
+ run_cmd("pip install -q torch==2.6.0+cu124")
8
+ run_cmd("pip install -q accelerate==1.5.2")
9
+ run_cmd("pip install -q lightning==2.4.0")
10
+ run_cmd("pip install -q lightning-utilities==0.14.3")
11
+ run_cmd("pip install -q pytorch-lightning==2.5.1.post0")
12
+ run_cmd("pip install -q jiwer==4.0.0")
13
+
14
+
15
+ run_cmd("apt-get update && apt-get install -y libsndfile1 ffmpeg")
16
+ run_cmd("pip install Cython")
17
+ run_cmd("pip install packaging")
18
+ run_cmd("pip -q install nemo_toolkit['asr']")
19
+
20
+
21
+ # NOTE: Special for env
22
+ run_cmd("pip install python-dotenv")
maintab_fast_conformer_10-09_01-54/code-folder/utils/utils.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import csv
3
+ import glob
4
+ import json
5
+ import logging
6
+ import logging as log
7
+ import os
8
+ import random
9
+ import re
10
+ import shutil
11
+ import string
12
+ import sys
13
+ import unicodedata
14
+
15
+ import jiwer
16
+ import lightning.pytorch as pl
17
+ import nemo
18
+ import nemo.collections.asr as nemo_asr
19
+ import numpy as np
20
+ import torch
21
+ from datasets import load_dataset
22
+ from jiwer import wer
23
+ from lightning.pytorch.callbacks import Callback, EarlyStopping, ModelCheckpoint
24
+ from lightning.pytorch.utilities.model_summary import ModelSummary
25
+ from omegaconf import OmegaConf
26
+ from scipy.io import wavfile
27
+
28
+ # * V0 -> this
29
+ from V0_Import.import_src import push_file_to_hub
30
+
31
+ # Dont need to edit, please
32
+
33
+
34
+ class LossLogger(Callback):
35
+ def __init__(self, exp_dir):
36
+ super().__init__()
37
+ self.train_losses = []
38
+ self.val_losses = []
39
+ self.train_wer = []
40
+ self.val_wer = []
41
+ self.num_last = 100 # ? epoch unit
42
+ self.num_plot = 100 # ? epoch
43
+ self.allow_show_plot = False # ? Allow show plot in notebook
44
+ self.exp_dir = exp_dir
45
+
46
+ def on_train_epoch_end(self, trainer, pl_module):
47
+ log.info(trainer.callback_metrics)
48
+
49
+ train_loss = trainer.callback_metrics.get('train_loss')
50
+ epoch_idx = trainer.current_epoch
51
+ lr = trainer.optimizers[0].param_groups[0]['lr'] # Print lr
52
+ log.info(f"Epoch {epoch_idx} ended." + "=" * 100)
53
+ if train_loss is not None:
54
+ self.train_losses.append(train_loss.item())
55
+ log.info(f"Train Loss: {train_loss.item()}, lr: {lr}")
56
+
57
+ if epoch_idx != 0 and epoch_idx % self.num_plot == 0:
58
+ self._plot_train()
59
+
60
+ def on_validation_epoch_end(self, trainer, pl_module):
61
+ val_loss = trainer.callback_metrics.get('val_loss')
62
+ val_wer = trainer.callback_metrics.get('val_wer')
63
+ if val_loss is not None:
64
+ self.val_losses.append(val_loss.item())
65
+ log.info(f"Validation Loss: {val_loss.item()}")
66
+ if val_wer is not None:
67
+ self.val_wer.append(val_wer.item())
68
+ log.info(f"Validation WER: {val_wer.item()}")
69
+
70
+ def _plot_train(self):
71
+ import matplotlib.pyplot as plt
72
+ plt.figure(figsize=(10, 6))
73
+ plt.subplot(2, 1, 1)
74
+ num = self.num_last
75
+ plt.plot(self.train_losses[-num:], label='Training Loss')
76
+ plt.plot(self.val_losses[-num:], label='Validation Loss')
77
+ plt.xlabel('Epoch')
78
+ plt.ylabel('Loss')
79
+ plt.legend()
80
+ plt.title('Training and Validation Loss')
81
+
82
+ plt.subplot(2, 1, 2)
83
+ plt.plot(self.train_wer[-num:], label='Training WER')
84
+ plt.plot(self.val_wer[-num:], label='Validation WER')
85
+ plt.xlabel('Epoch')
86
+ plt.ylabel('WER')
87
+ plt.legend()
88
+ plt.title('Training and Validation WER')
89
+ plt.tight_layout()
90
+ # allow_show_plot = True # Allow show plot in notebook
91
+ if self.allow_show_plot:
92
+ plt.show()
93
+ else:
94
+ plot_png = os.path.join(
95
+ self.exp_dir, f"training_process_{len(self.val_wer)}.png")
96
+ plt.savefig(plot_png)
97
+ push_file_to_hub(plot_png)
98
+
99
+ def on_train_end(self, trainer, pl_module):
100
+ self.num_last = len(self.val_wer)
101
+ self._plot_train()
102
+
103
+
104
+ config_path = "V2_Run/Fast_conformer_nemo/configs/fast_conformer.yaml" # ? NOTE: Setting
105
+ res_exp_dir = "maintab_fast_conformer" # ? NOTE: Setting
106
+ src_folder = "V2_Run/Fast_conformer_nemo" # ? NOTE: Setting
107
+ os.makedirs(res_exp_dir, exist_ok=True)
108
+ dst_folder = os.path.join(res_exp_dir, "code-folder")
109
+ shutil.copytree(src_folder, dst_folder, dirs_exist_ok=True)
110
+ log.info(f"Copied code to {dst_folder}")
111
+
112
+
113
+ def write_txt_exp_dir(name, var):
114
+ path = os.path.join(res_exp_dir, name)
115
+ with open(path, "w", encoding="utf-8") as f:
116
+ f.write(str(var))
117
+ f.close()
118
+
119
+
120
+ # ==============================================================================
121
+
122
+
123
+ def create_time_callbacks(num_keep, min_stop, max_hour):
124
+ # num_keep = 500
125
+ early_stop_callback = EarlyStopping(
126
+ monitor="val_wer", # Metric to monitor
127
+ mode="min", # Lower is better
128
+ stopping_threshold=min_stop, # Stop if val_wer < 0.x
129
+ patience=num_keep, # Stop immediately when not reduce
130
+ verbose=True
131
+ )
132
+ # Keep top 5 checkpoints based on val_wer
133
+ num_avg = 5
134
+ save_last = False
135
+ checkpoint_callback = ModelCheckpoint(
136
+ dirpath=f"{res_exp_dir}/ckpts", # Dir of ckpts
137
+ filename="epoch{epoch}-valwer{val_wer:.4f}",
138
+ monitor="val_wer",
139
+ mode="min",
140
+ save_top_k=num_avg, # Only keep 5 best
141
+ save_last=save_last, # Also save last epoch: False
142
+ )
143
+ # max_time_training = "00:09:00:00"
144
+ max_time_training = f"00:{max_hour}:05:00" # ! Minutes
145
+ callback_list = [LossLogger(res_exp_dir),
146
+ early_stop_callback, checkpoint_callback]
147
+ return max_time_training, callback_list
148
+
149
+
150
+ def create_new_trainer(epochs, min_stop, max_hour="09"):
151
+ # NOTE: Setting
152
+ max_hour = "09" # ! Must edit when run
153
+ log.info(f"Hour to train is {max_hour}")
154
+ setting = {
155
+ 'num_keep': 500,
156
+ 'precision': 'bf16', # ! Use AMP
157
+ # 'precision': 32,
158
+ 'accumulate_grad_batches': 1,
159
+ 'max_hour': max_hour,
160
+ 'enable_progress_bar': False, # Off bar training to shorter log
161
+ }
162
+ log.info(f"Precision to train is {setting['precision']}")
163
+ # Batch size = 16 x accumulate_grad_batches
164
+ log.info(
165
+ f"Grad batch size to train is x{setting['accumulate_grad_batches']}")
166
+ # Create callbacks
167
+ max_time_training, callback_list = create_time_callbacks(
168
+ num_keep=setting['num_keep'], min_stop=min_stop, max_hour=max_hour)
169
+ # Training args
170
+ trainer_dict = {
171
+ # Hardware
172
+ 'precision': setting['precision'], # Trade-off
173
+ 'devices': 1,
174
+ 'num_nodes': 1,
175
+ 'accelerator': 'gpu',
176
+ 'strategy': 'auto', # Must: no multi gpu
177
+ # Training
178
+ 'max_epochs': epochs,
179
+ 'accumulate_grad_batches': setting['accumulate_grad_batches'],
180
+ 'gradient_clip_val': 0.0,
181
+ # Prediction monitor
182
+ 'log_every_n_steps': 100, # Logging in a epoch train
183
+ 'val_check_interval': 1.0, # Compute wer after 1.0 epoch
184
+ # No-related
185
+ 'enable_progress_bar': setting['enable_progress_bar'],
186
+ 'num_sanity_val_steps': 0,
187
+ 'check_val_every_n_epoch': 1,
188
+ # If True, enables cudnn benchmarking for faster training.
189
+ 'sync_batchnorm': True,
190
+ 'benchmark': False,
191
+ # Saving and callback: New setting for callbacks
192
+ 'enable_checkpointing': True,
193
+ 'max_time': max_time_training,
194
+ 'callbacks': callback_list,
195
+ }
196
+ write_txt_exp_dir("args_trainer.txt", trainer_dict)
197
+ trainer = pl.Trainer(**trainer_dict)
198
+ return trainer
199
+
200
+
201
+ # ==============================================================================
202
+ # Dont need to edit, please
203
+
204
+
205
+ def reload_nemo_from_avg(best_paths, nemo_model):
206
+ w_only = False # NOTE: Use w_only = False because it error
207
+ load_strict = False
208
+
209
+ def average_checkpoints(paths):
210
+ avg_state_dict = None
211
+ for path in paths:
212
+ ckpt = torch.load(path, map_location="cpu",
213
+ weights_only=w_only)["state_dict"]
214
+ if avg_state_dict is None:
215
+ avg_state_dict = {k: v.clone() for k, v in ckpt.items()}
216
+ else:
217
+ for k in avg_state_dict:
218
+ # if it's int/bool, leave as-is
219
+ if torch.is_floating_point(avg_state_dict[k]):
220
+ avg_state_dict[k] += ckpt[k]
221
+ for k in avg_state_dict:
222
+ if torch.is_floating_point(avg_state_dict[k]):
223
+ avg_state_dict[k] /= len(paths)
224
+ return avg_state_dict
225
+
226
+ # Average
227
+ log.info(f"\n\nBest paths for AVG(model): {best_paths}")
228
+ avg_weights = average_checkpoints(best_paths)
229
+ # Assign averaged weights to NeMo model
230
+ nemo_model = nemo_model.to("cuda" if torch.cuda.is_available() else "cpu")
231
+ nemo_model.load_state_dict(avg_weights, strict=load_strict)
232
+ return nemo_model, avg_weights
233
+
234
+
235
+ def save_model_to_path(nemo_model, avg_weights, nemo_model_path, avg_ckpt_path):
236
+ torch.save({"state_dict": avg_weights}, avg_ckpt_path)
237
+ nemo_model.save_to(nemo_model_path)
238
+ log.info(f"\n\nSaved avg weights (.ckpt) at {avg_ckpt_path}")
239
+ log.info(f"Saved averaged NeMo model at {nemo_model_path}")
240
+
241
+
242
+ def nemo_inference_for_mfpath(nemo_model, mfpath):
243
+ def save_gen_list(text_list, gt_list, name_list):
244
+ random_name = ''.join(random.choices(string.digits, k=8))
245
+ file_path = f"{random_name}.csv"
246
+ # Save rd name
247
+ file_path = os.path.join(res_exp_dir, file_path)
248
+ log.info(f"Saved gen at {file_path}")
249
+ # Write it as .csv
250
+ with open(file_path, mode="w", newline="", encoding="utf-8") as f:
251
+ writer = csv.writer(f)
252
+ writer.writerow(["Gen", "GT", "Name"]) # header
253
+ for first, second, name in zip(text_list, gt_list, name_list):
254
+ writer.writerow([first, second, name])
255
+ return file_path
256
+
257
+ with open(mfpath, "r", encoding="utf-8") as fin:
258
+ data = [json.loads(line) for line in fin]
259
+ log.info(f"\n\nLoaded {len(data)} entries from {mfpath}")
260
+
261
+ references = []
262
+ predictions = []
263
+ names = []
264
+ from tqdm import tqdm
265
+ for entry in data: # Limit data if need
266
+ ref = entry['text']
267
+ audio_path = entry['audio_filepath']
268
+ with torch.no_grad():
269
+ pred = nemo_model.transcribe(audio_path, verbose=False)[0].text
270
+ # if use_norm:
271
+ # pred = normalize_text_vietnamese(pred)
272
+ references.append(ref)
273
+ predictions.append(pred)
274
+ names.append(os.path.basename(audio_path))
275
+
276
+ # Computer wer
277
+ wer_score = wer(references, predictions)
278
+ log.info(f"WER: {wer_score}")
279
+
280
+ # Save pred
281
+ df_path = save_gen_list(text_list=predictions,
282
+ gt_list=references, name_list=names)
283
+ return wer_score
maintab_fast_conformer_10-09_01-54/conf_model.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {'sample_rate': 16000, 'log_prediction': False, 'ctc_reduction': 'mean_volume', 'skip_nan_grad': False, 'train_ds': {'manifest_filepath': 'maintab_fast_conformer/_train_mf.json', 'sample_rate': 16000, 'batch_size': 16, 'shuffle': True, 'num_workers': 8, 'pin_memory': True, 'max_duration': 32.36, 'min_duration': 0.1, 'is_tarred': False, 'tarred_audio_filepaths': None, 'shuffle_n': 2048, 'bucketing_strategy': 'fully_randomized', 'bucketing_batch_size': None}, 'validation_ds': {'manifest_filepath': 'maintab_fast_conformer/_dev_mf.json', 'sample_rate': 16000, 'batch_size': 16, 'shuffle': False, 'use_start_end_token': False, 'num_workers': 8, 'pin_memory': True}, 'test_ds': {'manifest_filepath': 'V1_Setup/Out/1p/test_ds.json', 'sample_rate': 16000, 'batch_size': 16, 'shuffle': False, 'use_start_end_token': False, 'num_workers': 8, 'pin_memory': True}, 'tokenizer': {'dir': 'V1_Setup/Out/1p/', 'type': 'wpe'}, 'preprocessor': {'_target_': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'sample_rate': 16000, 'normalize': 'per_feature', 'window_size': 0.025, 'window_stride': 0.01, 'window': 'hann', 'features': 80, 'n_fft': 512, 'log': True, 'frame_splicing': 1, 'dither': 1e-05, 'pad_to': 0, 'pad_value': 0.0}, 'spec_augment': {'_target_': 'nemo.collections.asr.modules.SpectrogramAugmentation', 'freq_masks': 2, 'time_masks': 2, 'freq_width': 27, 'time_width': 0.05}, 'encoder': {'_target_': 'nemo.collections.asr.modules.ConformerEncoder', 'feat_in': 80, 'feat_out': -1, 'n_layers': 16, 'd_model': 256, 'subsampling': 'dw_striding', 'subsampling_factor': 8, 'subsampling_conv_channels': 256, 'causal_downsampling': False, 'ff_expansion_factor': 4, 'self_attention_model': 'rel_pos', 'n_heads': 4, 'att_context_size': [-1, -1], 'att_context_style': 'regular', 'xscaling': True, 'untie_biases': True, 'pos_emb_max_len': 5000, 'use_pytorch_sdpa': False, 'use_pytorch_sdpa_backends': [], 'conv_kernel_size': 9, 'conv_norm_type': 'batch_norm', 'conv_context_size': None, 'dropout': 0.1, 'dropout_pre_encoder': 0.1, 'dropout_emb': 0.0, 'dropout_att': 0.1, 'stochastic_depth_drop_prob': 0.0, 'stochastic_depth_mode': 'linear', 'stochastic_depth_start_layer': 1}, 'decoder': {'_target_': 'nemo.collections.asr.modules.ConvASRDecoder', 'feat_in': None, 'num_classes': -1, 'vocabulary': []}, 'interctc': {'loss_weights': [], 'apply_at_layers': []}, 'optim': {'name': 'adamw', 'lr': 0.001, 'betas': [0.9, 0.98], 'weight_decay': 0.001, 'sched': {'name': 'CosineAnnealing', 'warmup_steps': 15000, 'warmup_ratio': None, 'min_lr': 0.0001}}}
maintab_fast_conformer_10-09_01-54/git_pip_env.txt ADDED
@@ -0,0 +1,895 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a28ce37 run 1p fast conformer
2
+ Author date: 2025-10-08 22:16:18 +0700
3
+ Commit date: 2025-10-08 22:16:18 +0700
4
+ Python 3.11.13
5
+
6
+ absl-py==1.4.0
7
+ accelerate==1.5.2
8
+ aiofiles==22.1.0
9
+ aiohappyeyeballs==2.6.1
10
+ aiohttp==3.12.13
11
+ aiosignal==1.3.2
12
+ aiosqlite==0.21.0
13
+ alabaster==1.0.0
14
+ albucore==0.0.24
15
+ albumentations==2.0.8
16
+ ale-py==0.11.1
17
+ alembic==1.16.2
18
+ altair==5.5.0
19
+ annotated-types==0.7.0
20
+ annoy==1.17.3
21
+ ansicolors==1.1.8
22
+ antlr4-python3-runtime==4.9.3
23
+ anyio==4.9.0
24
+ argon2-cffi==25.1.0
25
+ argon2-cffi-bindings==21.2.0
26
+ args==0.1.0
27
+ array_record==0.7.2
28
+ arrow==1.3.0
29
+ arviz==0.21.0
30
+ astropy==7.1.0
31
+ astropy-iers-data==0.2025.6.23.0.39.50
32
+ asttokens==3.0.0
33
+ astunparse==1.6.3
34
+ atpublic==5.1
35
+ attrs==25.3.0
36
+ audioread==3.0.1
37
+ autograd==1.8.0
38
+ babel==2.17.0
39
+ backcall==0.2.0
40
+ backports.tarfile==1.2.0
41
+ bayesian-optimization==3.0.0
42
+ beartype==0.21.0
43
+ beautifulsoup4==4.13.4
44
+ betterproto==2.0.0b6
45
+ bigframes==2.8.0
46
+ bigquery-magics==0.9.0
47
+ bitsandbytes==0.46.0
48
+ bleach==6.2.0
49
+ blinker==1.9.0
50
+ blis==1.3.0
51
+ blobfile==3.0.0
52
+ blosc2==3.5.0
53
+ bokeh==3.7.3
54
+ Boruta==0.4.3
55
+ boto3==1.39.1
56
+ botocore==1.39.1
57
+ Bottleneck==1.4.2
58
+ -e git+https://github.com/SohierDane/BigQuery_Helper@8615a7f6c1663e7f2d48aa2b32c2dbcb600a440f#egg=bq_helper
59
+ bqplot==0.12.45
60
+ braceexpand==0.1.7
61
+ branca==0.8.1
62
+ build==1.2.2.post1
63
+ CacheControl==0.14.3
64
+ cachetools==5.5.2
65
+ Cartopy==0.24.1
66
+ catalogue==2.0.10
67
+ catboost==1.2.8
68
+ category_encoders==2.7.0
69
+ certifi==2025.6.15
70
+ cesium==0.12.4
71
+ cffi==1.17.1
72
+ chardet==5.2.0
73
+ charset-normalizer==3.4.2
74
+ Chessnut==0.4.1
75
+ chex==0.1.89
76
+ clarabel==0.11.1
77
+ click==8.2.1
78
+ click-plugins==1.1.1.2
79
+ cligj==0.7.2
80
+ clint==0.5.1
81
+ cloudpathlib==0.21.1
82
+ cloudpickle==3.1.1
83
+ cmake==3.31.6
84
+ cmdstanpy==1.2.5
85
+ colorama==0.4.6
86
+ colorcet==3.1.0
87
+ colorlog==6.9.0
88
+ colorlover==0.3.0
89
+ colour==0.1.5
90
+ comm==0.2.2
91
+ community==1.0.0b1
92
+ confection==0.1.5
93
+ cons==0.4.6
94
+ contourpy==1.3.2
95
+ coverage==7.9.1
96
+ cramjam==2.10.0
97
+ cryptography==44.0.3
98
+ cuda-bindings==12.9.0
99
+ cuda-python==12.9.0
100
+ cudf-cu12==25.2.2
101
+ cudf-polars-cu12==25.2.2
102
+ cufflinks==0.17.3
103
+ cuml-cu12==25.2.1
104
+ cupy-cuda12x==13.4.1
105
+ curl_cffi==0.11.4
106
+ cuvs-cu12==25.2.1
107
+ cvxopt==1.3.2
108
+ cvxpy==1.6.6
109
+ cycler==0.12.1
110
+ cyipopt==1.5.0
111
+ cymem==2.0.11
112
+ Cython==3.0.12
113
+ cytoolz==1.0.1
114
+ daal==2025.6.1
115
+ dacite==1.9.2
116
+ dask==2024.12.1
117
+ dask-cuda==25.2.0
118
+ dask-cudf-cu12==25.2.2
119
+ dask-expr==1.1.21
120
+ dataclasses-json==0.6.7
121
+ dataproc-spark-connect==0.7.5
122
+ datascience==0.17.6
123
+ datasets==3.6.0
124
+ db-dtypes==1.4.3
125
+ dbus-python==1.2.18
126
+ deap==1.4.3
127
+ debugpy==1.8.0
128
+ decorator==4.4.2
129
+ deepdiff==8.5.0
130
+ defusedxml==0.7.1
131
+ Deprecated==1.2.18
132
+ diffusers==0.34.0
133
+ dill==0.3.8
134
+ dipy==1.11.0
135
+ distributed==2024.12.1
136
+ distributed-ucxx-cu12==0.42.0
137
+ distro==1.9.0
138
+ dlib==19.24.6
139
+ dm-tree==0.1.9
140
+ dnspython==2.7.0
141
+ docker==7.1.0
142
+ docopt==0.6.2
143
+ docstring-to-markdown==0.17
144
+ docstring_parser==0.16
145
+ docutils==0.21.2
146
+ dopamine_rl==4.1.2
147
+ duckdb==1.2.2
148
+ earthengine-api==1.5.21
149
+ easydict==1.13
150
+ easyocr==1.7.2
151
+ editdistance==0.8.1
152
+ eerepr==0.1.2
153
+ einops==0.8.1
154
+ eli5==0.13.0
155
+ email_validator==2.2.0
156
+ emoji==2.14.1
157
+ en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
158
+ entrypoints==0.4
159
+ et_xmlfile==2.0.0
160
+ etils==1.12.2
161
+ etuples==0.3.9
162
+ execnb==0.1.14
163
+ Farama-Notifications==0.0.4
164
+ fastai==2.7.19
165
+ fastapi==0.115.13
166
+ fastcore==1.7.29
167
+ fastdownload==0.0.7
168
+ fastjsonschema==2.21.1
169
+ fastprogress==1.0.3
170
+ fastrlock==0.8.3
171
+ fasttext==0.9.3
172
+ featuretools==1.31.0
173
+ ffmpy==0.6.0
174
+ fiddle==0.3.0
175
+ filelock==3.18.0
176
+ fiona==1.10.1
177
+ firebase-admin==6.9.0
178
+ Flask==3.1.1
179
+ flatbuffers==25.2.10
180
+ flax==0.10.6
181
+ folium==0.19.7
182
+ fonttools==4.58.4
183
+ fqdn==1.5.1
184
+ frozendict==2.4.6
185
+ frozenlist==1.7.0
186
+ fsspec==2024.12.0
187
+ funcy==2.0
188
+ fury==0.12.0
189
+ future==1.0.0
190
+ fuzzywuzzy==0.18.0
191
+ gast==0.6.0
192
+ gatspy==0.3
193
+ gcsfs==2025.3.2
194
+ GDAL==3.8.4
195
+ gdown==5.2.0
196
+ geemap==0.35.3
197
+ gensim==4.3.3
198
+ geocoder==1.38.1
199
+ geographiclib==2.0
200
+ geojson==3.2.0
201
+ geopandas==0.14.4
202
+ geopy==2.4.1
203
+ ghapi==1.0.6
204
+ gin-config==0.5.0
205
+ gitdb==4.0.12
206
+ GitPython==3.1.44
207
+ glob2==0.7
208
+ google==2.0.3
209
+ google-ai-generativelanguage==0.6.15
210
+ google-api-core==1.34.1
211
+ google-api-python-client==2.173.0
212
+ google-auth==2.40.3
213
+ google-auth-httplib2==0.2.0
214
+ google-auth-oauthlib==1.2.2
215
+ google-cloud-aiplatform==1.99.0
216
+ google-cloud-automl==1.0.1
217
+ google-cloud-bigquery==3.25.0
218
+ google-cloud-bigquery-connection==1.18.3
219
+ google-cloud-core==2.4.3
220
+ google-cloud-dataproc==5.20.0
221
+ google-cloud-datastore==2.21.0
222
+ google-cloud-firestore==2.21.0
223
+ google-cloud-functions==1.20.4
224
+ google-cloud-iam==2.19.1
225
+ google-cloud-language==2.17.2
226
+ google-cloud-resource-manager==1.14.2
227
+ google-cloud-spanner==3.55.0
228
+ google-cloud-storage==2.19.0
229
+ google-cloud-translate==3.12.1
230
+ google-cloud-videointelligence==2.16.2
231
+ google-cloud-vision==3.10.2
232
+ google-colab @ file:///colabtools/dist/google_colab-1.0.0.tar.gz
233
+ google-crc32c==1.7.1
234
+ google-genai==1.21.1
235
+ google-generativeai==0.8.5
236
+ google-pasta==0.2.0
237
+ google-resumable-media==2.7.2
238
+ googleapis-common-protos==1.70.0
239
+ googledrivedownloader==1.1.0
240
+ gpxpy==1.6.2
241
+ gradio==5.31.0
242
+ gradio_client==1.10.1
243
+ graphviz==0.21
244
+ greenlet==3.2.3
245
+ groovy==0.1.2
246
+ grpc-google-iam-v1==0.14.2
247
+ grpc-interceptor==0.15.4
248
+ grpcio==1.73.1
249
+ grpcio-status==1.49.0rc1
250
+ grpclib==0.4.8
251
+ gspread==6.2.1
252
+ gspread-dataframe==4.0.0
253
+ gym==0.25.2
254
+ gym-notices==0.0.8
255
+ gymnasium==0.29.0
256
+ h11==0.16.0
257
+ h2==4.2.0
258
+ h2o==3.46.0.7
259
+ h5netcdf==1.6.1
260
+ h5py==3.14.0
261
+ haversine==2.9.0
262
+ hdbscan==0.8.40
263
+ hep_ml==0.8.0
264
+ hf-xet==1.1.5
265
+ hf_transfer==0.1.9
266
+ highspy==1.11.0
267
+ holidays==0.75
268
+ holoviews==1.20.2
269
+ hpack==4.1.0
270
+ html5lib==1.1
271
+ htmlmin==0.1.12
272
+ httpcore==1.0.9
273
+ httpimport==1.4.1
274
+ httplib2==0.22.0
275
+ httpx==0.28.1
276
+ huggingface-hub==0.33.1
277
+ humanize==4.12.3
278
+ hydra-core==1.3.2
279
+ hyperframe==6.1.0
280
+ hyperopt==0.2.7
281
+ ibis-framework==9.5.0
282
+ id==1.5.0
283
+ idna==3.10
284
+ igraph==0.11.9
285
+ ImageHash==4.3.1
286
+ imageio==2.37.0
287
+ imageio-ffmpeg==0.6.0
288
+ imagesize==1.4.1
289
+ imbalanced-learn==0.13.0
290
+ immutabledict==4.2.1
291
+ importlib_metadata==8.7.0
292
+ importlib_resources==6.5.2
293
+ imutils==0.5.4
294
+ in-toto-attestation==0.9.3
295
+ indic_numtowords==1.1.0
296
+ inflect==7.5.0
297
+ iniconfig==2.1.0
298
+ intel-cmplr-lib-rt==2024.2.0
299
+ intel-cmplr-lib-ur==2024.2.0
300
+ intel-openmp==2024.2.0
301
+ intervaltree==3.1.0
302
+ ipyevents==2.0.2
303
+ ipyfilechooser==0.6.0
304
+ ipykernel==6.17.1
305
+ ipyleaflet==0.20.0
306
+ ipympl==0.9.7
307
+ ipyparallel==8.8.0
308
+ ipython==7.34.0
309
+ ipython-genutils==0.2.0
310
+ ipython-sql==0.5.0
311
+ ipytree==0.2.2
312
+ ipywidgets==8.1.5
313
+ isoduration==20.11.0
314
+ isoweek==1.3.3
315
+ itsdangerous==2.2.0
316
+ Janome==0.5.0
317
+ jaraco.classes==3.4.0
318
+ jaraco.context==6.0.1
319
+ jaraco.functools==4.2.1
320
+ jax==0.5.2
321
+ jax-cuda12-pjrt==0.5.1
322
+ jax-cuda12-plugin==0.5.1
323
+ jaxlib==0.5.1
324
+ jedi==0.19.2
325
+ jeepney==0.9.0
326
+ jieba==0.42.1
327
+ Jinja2==3.1.6
328
+ jiter==0.10.0
329
+ jiwer==3.1.0
330
+ jmespath==1.0.1
331
+ joblib==1.5.1
332
+ json5==0.12.0
333
+ jsonpatch==1.33
334
+ jsonpickle==4.1.1
335
+ jsonpointer==3.0.0
336
+ jsonschema==4.24.0
337
+ jsonschema-specifications==2025.4.1
338
+ jupyter-console==6.1.0
339
+ jupyter-events==0.12.0
340
+ jupyter-leaflet==0.20.0
341
+ jupyter-lsp==1.5.1
342
+ jupyter-ydoc==0.2.5
343
+ jupyter_client==8.6.3
344
+ jupyter_core==5.8.1
345
+ jupyter_kernel_gateway @ git+https://github.com/googlecolab/kernel_gateway@b134e9945df25c2dcb98ade9129399be10788671
346
+ jupyter_server==2.12.5
347
+ jupyter_server_fileid==0.9.3
348
+ jupyter_server_terminals==0.5.3
349
+ jupyter_server_ydoc==0.8.0
350
+ jupyterlab==3.6.8
351
+ jupyterlab-lsp==3.10.2
352
+ jupyterlab_pygments==0.3.0
353
+ jupyterlab_server==2.27.3
354
+ jupyterlab_widgets==3.0.15
355
+ jupytext==1.17.2
356
+ kaggle==1.7.4.5
357
+ kaggle-environments==1.17.6
358
+ kagglehub==0.3.12
359
+ kaldi-python-io==1.2.2
360
+ keras==3.8.0
361
+ keras-core==0.1.7
362
+ keras-cv==0.9.0
363
+ keras-hub==0.18.1
364
+ keras-nlp==0.18.1
365
+ keras-tuner==1.4.7
366
+ keyring==25.6.0
367
+ keyrings.google-artifactregistry-auth==1.1.2
368
+ kiwisolver==1.4.8
369
+ kornia==0.8.1
370
+ kornia_rs==0.1.9
371
+ kt-legacy==1.0.5
372
+ langchain==0.3.26
373
+ langchain-core==0.3.66
374
+ langchain-text-splitters==0.3.8
375
+ langcodes==3.5.0
376
+ langid==1.1.6
377
+ langsmith==0.4.1
378
+ language_data==1.3.0
379
+ launchpadlib==1.10.16
380
+ lazr.restfulclient==0.14.4
381
+ lazr.uri==1.0.6
382
+ lazy_loader==0.4
383
+ learntools @ git+https://github.com/Kaggle/learntools@003526b6ef9d864326e2a099599de3380b4cc94c
384
+ Levenshtein==0.27.1
385
+ lhotse==1.31.1
386
+ libclang==18.1.1
387
+ libcst==1.8.5
388
+ libcudf-cu12==25.2.2
389
+ libcugraph-cu12==25.2.0
390
+ libcuml-cu12==25.2.1
391
+ libcuvs-cu12==25.2.1
392
+ libkvikio-cu12==25.2.1
393
+ libpysal==4.9.2
394
+ libraft-cu12==25.2.0
395
+ librosa==0.11.0
396
+ libucx-cu12==1.18.1
397
+ libucxx-cu12==0.42.0
398
+ lightgbm @ file:///tmp/lightgbm/LightGBM/dist/lightgbm-4.5.0-py3-none-linux_x86_64.whl
399
+ lightning==2.4.0
400
+ lightning-utilities==0.14.3
401
+ lilcom==1.8.1
402
+ lime==0.2.0.1
403
+ line_profiler==4.2.0
404
+ linkify-it-py==2.0.3
405
+ llvmlite==0.43.0
406
+ lml==0.2.0
407
+ locket==1.0.0
408
+ logical-unification==0.4.6
409
+ loguru==0.7.3
410
+ lxml==5.4.0
411
+ Mako==1.3.10
412
+ mamba==0.11.3
413
+ marisa-trie==1.2.1
414
+ Markdown==3.8.2
415
+ markdown-it-py==3.0.0
416
+ MarkupSafe==3.0.2
417
+ marshmallow==3.26.1
418
+ matplotlib==3.7.2
419
+ matplotlib-inline==0.1.7
420
+ matplotlib-venn==1.1.2
421
+ mdit-py-plugins==0.4.2
422
+ mdurl==0.1.2
423
+ mediapy==1.1.6
424
+ miniKanren==1.0.3
425
+ missingno==0.5.2
426
+ mistune==0.8.4
427
+ mizani==0.13.5
428
+ mkl==2025.2.0
429
+ mkl-fft==1.3.8
430
+ mkl-random==1.2.4
431
+ mkl-service==2.4.1
432
+ mkl-umath==0.1.1
433
+ ml-dtypes==0.4.1
434
+ ml_collections==1.1.0
435
+ mlcrate==0.2.0
436
+ mlxtend==0.23.4
437
+ mne==1.9.0
438
+ model-signing==1.0.1
439
+ more-itertools==10.7.0
440
+ moviepy==1.0.3
441
+ mpld3==0.5.10
442
+ mpmath==1.3.0
443
+ msgpack==1.1.1
444
+ multidict==6.6.3
445
+ multimethod==1.12
446
+ multipledispatch==1.0.0
447
+ multiprocess==0.70.16
448
+ multitasking==0.0.11
449
+ murmurhash==1.0.13
450
+ music21==9.3.0
451
+ mypy_extensions==1.1.0
452
+ namex==0.1.0
453
+ narwhals==1.44.0
454
+ natsort==8.4.0
455
+ nbclassic==1.3.1
456
+ nbclient==0.5.13
457
+ nbconvert==6.4.5
458
+ nbdev==2.3.36
459
+ nbformat==5.10.4
460
+ ndindex==1.10.0
461
+ nemo-toolkit==2.5.0
462
+ nest-asyncio==1.6.0
463
+ networkx==3.5
464
+ nibabel==5.3.2
465
+ nilearn==0.10.4
466
+ ninja==1.11.1.4
467
+ nltk==3.9.1
468
+ notebook==6.5.4
469
+ notebook_shim==0.2.4
470
+ num2words==0.5.14
471
+ numba==0.60.0
472
+ numba-cuda==0.2.0
473
+ numexpr==2.11.0
474
+ numpy==1.26.4
475
+ nvidia-cublas-cu12==12.4.5.8
476
+ nvidia-cuda-cupti-cu12==12.4.127
477
+ nvidia-cuda-nvcc-cu12==12.5.82
478
+ nvidia-cuda-nvrtc-cu12==12.4.127
479
+ nvidia-cuda-runtime-cu12==12.4.127
480
+ nvidia-cudnn-cu12==9.1.0.70
481
+ nvidia-cufft-cu12==11.2.1.3
482
+ nvidia-curand-cu12==10.3.5.147
483
+ nvidia-cusolver-cu12==11.6.1.9
484
+ nvidia-cusparse-cu12==12.3.1.170
485
+ nvidia-cusparselt-cu12==0.6.2
486
+ nvidia-ml-py==12.575.51
487
+ nvidia-nccl-cu12==2.21.5
488
+ nvidia-nvcomp-cu12==4.2.0.11
489
+ nvidia-nvjitlink-cu12==12.4.127
490
+ nvidia-nvtx-cu12==12.4.127
491
+ nvtx==0.2.12
492
+ nx-cugraph-cu12 @ https://pypi.nvidia.com/nx-cugraph-cu12/nx_cugraph_cu12-25.2.0-py3-none-any.whl
493
+ oauth2client==4.1.3
494
+ oauthlib==3.3.1
495
+ odfpy==1.4.1
496
+ olefile==0.47
497
+ omegaconf==2.3.0
498
+ onnx==1.18.0
499
+ open_spiel==1.6
500
+ openai==1.91.0
501
+ opencv-contrib-python==4.11.0.86
502
+ opencv-python==4.11.0.86
503
+ opencv-python-headless==4.11.0.86
504
+ openpyxl==3.1.5
505
+ openslide-bin==4.0.0.8
506
+ openslide-python==1.4.2
507
+ opt_einsum==3.4.0
508
+ optax==0.2.5
509
+ optree==0.16.0
510
+ optuna==4.4.0
511
+ orbax-checkpoint==0.11.16
512
+ orderly-set==5.4.1
513
+ orjson==3.10.18
514
+ osqp==1.0.4
515
+ overrides==7.7.0
516
+ packaging==24.2
517
+ pandas==2.2.3
518
+ pandas-datareader==0.10.0
519
+ pandas-gbq==0.29.1
520
+ pandas-profiling==3.6.6
521
+ pandas-stubs==2.2.2.240909
522
+ pandasql==0.7.3
523
+ pandocfilters==1.5.1
524
+ panel==1.7.1
525
+ papermill==2.6.0
526
+ param==2.2.1
527
+ parso==0.8.4
528
+ parsy==2.1
529
+ partd==1.4.2
530
+ path==17.1.0
531
+ path.py==12.5.0
532
+ pathlib==1.0.1
533
+ pathos==0.3.1
534
+ patsy==1.0.1
535
+ pdf2image==1.17.0
536
+ peewee==3.18.1
537
+ peft==0.15.2
538
+ pettingzoo==1.24.0
539
+ pexpect==4.9.0
540
+ phik==0.12.4
541
+ pickleshare==0.7.5
542
+ pillow==11.2.1
543
+ plac==1.4.5
544
+ platformdirs==4.3.8
545
+ plotly==5.24.1
546
+ plotly-express==0.4.1
547
+ plotnine==0.14.5
548
+ pluggy==1.6.0
549
+ plum-dispatch==2.5.7
550
+ ply==3.11
551
+ polars==1.21.0
552
+ pooch==1.8.2
553
+ portpicker==1.5.2
554
+ pox==0.3.6
555
+ ppft==1.7.7
556
+ preprocessing==0.1.13
557
+ preshed==3.0.10
558
+ prettytable==3.16.0
559
+ proglog==0.1.12
560
+ progressbar2==4.5.0
561
+ prometheus_client==0.22.1
562
+ promise==2.3
563
+ prompt_toolkit==3.0.51
564
+ propcache==0.3.2
565
+ prophet==1.1.7
566
+ proto-plus==1.26.1
567
+ protobuf==5.29.5
568
+ psutil==7.0.0
569
+ psycopg2==2.9.10
570
+ ptyprocess==0.7.0
571
+ pudb==2025.1
572
+ puremagic==1.29
573
+ py-cpuinfo==9.0.0
574
+ py4j==0.10.9.7
575
+ pyaml==25.5.0
576
+ pyannote.core==5.0.0
577
+ pyannote.database==5.1.3
578
+ pyannote.metrics==3.2.1
579
+ PyArabic==0.6.15
580
+ pyarrow==19.0.1
581
+ pyasn1==0.6.1
582
+ pyasn1_modules==0.4.2
583
+ pybind11==2.13.6
584
+ pycairo==1.28.0
585
+ pyclipper==1.3.0.post6
586
+ pycocotools==2.0.10
587
+ pycparser==2.22
588
+ pycryptodome==3.23.0
589
+ pycryptodomex==3.23.0
590
+ pycuda==2025.1.1
591
+ pydantic==2.11.7
592
+ pydantic_core==2.33.2
593
+ pydata-google-auth==1.9.1
594
+ pydegensac==0.1.2
595
+ pydicom==3.0.1
596
+ pydot==3.0.4
597
+ pydotplus==2.0.2
598
+ PyDrive==1.3.1
599
+ PyDrive2==1.21.3
600
+ pydub==0.25.1
601
+ pyemd==1.0.0
602
+ pyerfa==2.0.1.5
603
+ pyexcel-io==0.6.7
604
+ pyexcel-ods==0.6.0
605
+ pygame==2.6.1
606
+ pygit2==1.18.0
607
+ pygltflib==1.16.4
608
+ Pygments==2.19.2
609
+ PyGObject==3.42.0
610
+ PyJWT==2.10.1
611
+ pyLDAvis==3.4.1
612
+ pylibcudf-cu12==25.2.2
613
+ pylibcugraph-cu12==25.2.0
614
+ pylibraft-cu12==25.2.0
615
+ pyloudnorm==0.1.1
616
+ pymc==5.23.0
617
+ pymc3==3.11.4
618
+ pymongo==4.13.2
619
+ Pympler==1.1
620
+ pymystem3==0.2.0
621
+ pynndescent==0.5.13
622
+ pynvjitlink-cu12==0.5.2
623
+ pynvml==12.0.0
624
+ pyogrio==0.11.0
625
+ pyomo==6.9.2
626
+ PyOpenGL==3.1.9
627
+ pyOpenSSL==25.1.0
628
+ pyparsing==3.0.9
629
+ pypdf==5.7.0
630
+ pyperclip==1.9.0
631
+ pyproj==3.7.1
632
+ pyproject_hooks==1.2.0
633
+ pyshp==2.3.1
634
+ PySocks==1.7.1
635
+ pyspark==3.5.1
636
+ pytensor==2.31.4
637
+ pytesseract==0.3.13
638
+ pytest==8.3.5
639
+ python-apt==0.0.0
640
+ python-bidi==0.6.6
641
+ python-box==7.3.2
642
+ python-dateutil==2.9.0.post0
643
+ python-dotenv==1.1.1
644
+ python-json-logger==3.3.0
645
+ python-louvain==0.16
646
+ python-lsp-jsonrpc==1.1.2
647
+ python-lsp-server==1.12.2
648
+ python-multipart==0.0.20
649
+ python-slugify==8.0.4
650
+ python-snappy==0.7.3
651
+ python-utils==3.9.1
652
+ pytools==2025.1.7
653
+ pytorch-ignite==0.5.2
654
+ pytorch-lightning==2.5.1.post0
655
+ pytz==2025.2
656
+ PyUpSet==0.1.1.post7
657
+ pyviz_comms==3.0.6
658
+ PyWavelets==1.8.0
659
+ PyYAML==6.0.2
660
+ pyzmq==24.0.1
661
+ qgrid==1.3.1
662
+ qtconsole==5.6.1
663
+ QtPy==2.4.3
664
+ raft-dask-cu12==25.2.0
665
+ RapidFuzz==3.14.1
666
+ rapids-dask-dependency==25.2.0
667
+ ratelim==0.1.6
668
+ ray==2.47.1
669
+ referencing==0.36.2
670
+ regex==2024.11.6
671
+ requests==2.32.4
672
+ requests-oauthlib==2.0.0
673
+ requests-toolbelt==1.0.0
674
+ requirements-parser==0.9.0
675
+ resampy==0.4.3
676
+ rfc3161-client==1.0.3
677
+ rfc3339-validator==0.1.4
678
+ rfc3986-validator==0.1.1
679
+ rfc8785==0.1.4
680
+ rgf-python==3.12.0
681
+ rich==14.0.0
682
+ rmm-cu12==25.2.0
683
+ roman-numerals-py==3.1.0
684
+ rpds-py==0.25.1
685
+ rpy2==3.5.17
686
+ rsa==4.9.1
687
+ rtree==1.4.0
688
+ ruamel.yaml==0.18.15
689
+ ruamel.yaml.clib==0.2.14
690
+ ruff==0.12.0
691
+ s3fs==0.4.2
692
+ s3transfer==0.13.0
693
+ sacremoses==0.1.1
694
+ safehttpx==0.1.6
695
+ safetensors==0.5.3
696
+ scikit-image==0.25.2
697
+ scikit-learn==1.2.2
698
+ scikit-learn-intelex==2025.6.1
699
+ scikit-multilearn==0.2.0
700
+ scikit-optimize==0.10.2
701
+ scikit-plot==0.3.7
702
+ scikit-surprise==1.1.4
703
+ scipy==1.15.2
704
+ scooby==0.10.1
705
+ scs==3.2.7.post2
706
+ seaborn==0.12.2
707
+ SecretStorage==3.3.3
708
+ securesystemslib==1.3.0
709
+ segment_anything @ git+https://github.com/facebookresearch/segment-anything.git@dca509fe793f601edb92606367a655c15ac00fdf
710
+ semantic-version==2.10.0
711
+ semver==3.0.4
712
+ Send2Trash==1.8.3
713
+ sentence-transformers==4.1.0
714
+ sentencepiece==0.2.0
715
+ sentry-sdk==2.31.0
716
+ setproctitle==1.3.6
717
+ setuptools-scm==8.3.1
718
+ shap==0.44.1
719
+ shapely==2.1.1
720
+ shellingham==1.5.4
721
+ Shimmy==1.3.0
722
+ sigstore==3.6.4
723
+ sigstore-protobuf-specs==0.3.2
724
+ sigstore-rekor-types==0.0.18
725
+ simple-parsing==0.1.7
726
+ simpleitk==2.5.2
727
+ simplejson==3.20.1
728
+ simsimd==6.4.9
729
+ siphash24==1.7
730
+ six==1.17.0
731
+ sklearn-compat==0.1.3
732
+ sklearn-pandas==2.2.0
733
+ slicer==0.0.7
734
+ smart-open==7.1.0
735
+ smmap==5.0.2
736
+ sniffio==1.3.1
737
+ snowballstemmer==3.0.1
738
+ sortedcontainers==2.4.0
739
+ soundfile==0.13.1
740
+ soupsieve==2.7
741
+ sox==1.5.0
742
+ soxr==0.5.0.post1
743
+ spacy==3.8.7
744
+ spacy-legacy==3.0.12
745
+ spacy-loggers==1.0.5
746
+ spanner-graph-notebook==1.1.7
747
+ Sphinx==8.2.3
748
+ sphinx-rtd-theme==0.2.4
749
+ sphinxcontrib-applehelp==2.0.0
750
+ sphinxcontrib-devhelp==2.0.0
751
+ sphinxcontrib-htmlhelp==2.1.0
752
+ sphinxcontrib-jsmath==1.0.1
753
+ sphinxcontrib-qthelp==2.0.0
754
+ sphinxcontrib-serializinghtml==2.0.0
755
+ SQLAlchemy==2.0.41
756
+ sqlglot==25.20.2
757
+ sqlparse==0.5.3
758
+ squarify==0.4.4
759
+ srsly==2.5.1
760
+ stable-baselines3==2.1.0
761
+ stanio==0.5.1
762
+ starlette==0.46.2
763
+ statsmodels==0.14.4
764
+ stopit==1.1.2
765
+ stringzilla==3.12.5
766
+ stumpy==1.13.0
767
+ sympy==1.13.1
768
+ tables==3.10.2
769
+ tabulate==0.9.0
770
+ tbb==2022.2.0
771
+ tbb4py==2022.2.0
772
+ tblib==3.1.0
773
+ tcmlib==1.4.0
774
+ tenacity==8.5.0
775
+ tensorboard==2.18.0
776
+ tensorboard-data-server==0.7.2
777
+ tensorflow==2.18.0
778
+ tensorflow-cloud==0.1.5
779
+ tensorflow-datasets==4.9.9
780
+ tensorflow-hub==0.16.1
781
+ tensorflow-io==0.37.1
782
+ tensorflow-io-gcs-filesystem==0.37.1
783
+ tensorflow-metadata==1.17.2
784
+ tensorflow-probability==0.25.0
785
+ tensorflow-text==2.18.1
786
+ tensorflow_decision_forests==1.11.0
787
+ tensorstore==0.1.74
788
+ termcolor==3.1.0
789
+ terminado==0.18.1
790
+ testpath==0.6.0
791
+ text-unidecode==1.3
792
+ textblob==0.19.0
793
+ texterrors==0.5.1
794
+ texttable==1.7.0
795
+ tf-slim==1.1.0
796
+ tf_keras==2.18.0
797
+ Theano==1.0.5
798
+ Theano-PyMC==1.1.2
799
+ thinc==8.3.6
800
+ threadpoolctl==3.6.0
801
+ tifffile==2025.6.11
802
+ tiktoken==0.9.0
803
+ timm==1.0.15
804
+ tinycss2==1.4.0
805
+ tokenizers==0.21.2
806
+ toml==0.10.2
807
+ tomlkit==0.13.3
808
+ toolz==1.0.0
809
+ torch @ https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl
810
+ torchao==0.10.0
811
+ torchaudio @ https://download.pytorch.org/whl/cu124/torchaudio-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl
812
+ torchdata==0.11.0
813
+ torchinfo==1.8.0
814
+ torchmetrics==1.7.3
815
+ torchsummary==1.5.1
816
+ torchtune==0.6.1
817
+ torchvision @ https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp311-cp311-linux_x86_64.whl
818
+ tornado==6.5.1
819
+ TPOT==0.12.1
820
+ tqdm==4.67.1
821
+ traitlets==5.7.1
822
+ traittypes==0.2.1
823
+ transformers==4.53.3
824
+ treelite==4.4.1
825
+ treescope==0.1.9
826
+ triton==3.2.0
827
+ trx-python==0.3
828
+ tsfresh==0.21.0
829
+ tuf==6.0.0
830
+ tweepy==4.15.0
831
+ typeguard==4.4.4
832
+ typer==0.16.0
833
+ types-python-dateutil==2.9.0.20250516
834
+ types-pytz==2025.2.0.20250516
835
+ types-setuptools==80.9.0.20250529
836
+ typing-inspect==0.9.0
837
+ typing-inspection==0.4.1
838
+ typing_extensions==4.14.0
839
+ tzdata==2025.2
840
+ tzlocal==5.3.1
841
+ uc-micro-py==1.0.3
842
+ ucx-py-cu12==0.42.0
843
+ ucxx-cu12==0.42.0
844
+ ujson==5.10.0
845
+ umap-learn==0.5.7
846
+ umf==0.11.0
847
+ update-checker==0.18.0
848
+ uri-template==1.3.0
849
+ uritemplate==4.2.0
850
+ urllib3==2.5.0
851
+ urwid==3.0.2
852
+ urwid_readline==0.15.1
853
+ uvicorn==0.34.3
854
+ vega-datasets==0.9.0
855
+ visions==0.8.1
856
+ vtk==9.3.1
857
+ wadllib==1.3.6
858
+ Wand==0.6.13
859
+ wandb==0.20.1
860
+ wasabi==1.1.3
861
+ watchdog==6.0.0
862
+ wavio==0.0.9
863
+ wcwidth==0.2.13
864
+ weasel==0.4.1
865
+ webcolors==24.11.1
866
+ webdataset==1.0.2
867
+ webencodings==0.5.1
868
+ websocket-client==1.8.0
869
+ websockets==15.0.1
870
+ Werkzeug==3.1.3
871
+ wget==3.2
872
+ whisper_normalizer==0.1.12
873
+ widgetsnbextension==4.0.14
874
+ woodwork==0.31.0
875
+ wordcloud==1.9.4
876
+ wrapt==1.17.2
877
+ wurlitzer==3.1.1
878
+ xarray==2025.3.1
879
+ xarray-einstats==0.9.1
880
+ xgboost==2.0.3
881
+ xlrd==2.0.2
882
+ xvfbwrapper==0.2.13
883
+ xxhash==3.5.0
884
+ xyzservices==2025.4.0
885
+ y-py==0.6.2
886
+ yarl==1.20.1
887
+ ydata-profiling==4.16.1
888
+ ydf==0.9.0
889
+ yellowbrick==1.5
890
+ yfinance==0.2.63
891
+ ypy-websocket==0.8.4
892
+ zict==3.0.0
893
+ zipp==3.23.0
894
+ zstandard==0.23.0
895
+
maintab_fast_conformer_10-09_01-54/model_avg.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25cafd383db36a0454a708f0b0555a5eb6afcb23e96f3adc5692787482c5c357
3
+ size 111337722
maintab_fast_conformer_10-09_01-54/model_mp.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ | Name | Type | Params | Mode
2
+ --------------------------------------------------------------------------------
3
+ 0 | preprocessor | AudioToMelSpectrogramPreprocessor | 0 | train
4
+ 1 | encoder | ConformerEncoder | 26.1 M | train
5
+ 2 | decoder | ConvASRDecoder | 1.6 M | train
6
+ 3 | loss | CTCLoss | 0 | train
7
+ 4 | spec_augmentation | SpectrogramAugmentation | 0 | train
8
+ 5 | wer | WER | 0 | train
9
+ --------------------------------------------------------------------------------
10
+ 27.8 M Trainable params
11
+ 0 Non-trainable params
12
+ 27.8 M Total params
13
+ 111.012 Total estimated model params size (MB)
14
+ 471 Modules in train mode
15
+ 0 Modules in eval mode
maintab_fast_conformer_10-09_01-54/nemo_model_avg.nemo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30ac7e57a46f8d38407a755f6c1b28b6414914374dbde26538d025677016842c
3
+ size 111636480
maintab_fast_conformer_10-09_01-54/training_process_100.png ADDED
maintab_fast_conformer_10-09_01-54/training_process_112.png ADDED
maintab_fast_conformer_10-09_01-54/training_process_42.png ADDED