QuanHoangNgoc commited on Oct 9

Commit

cffeb75

verified ·

1 Parent(s): e30f601

Upload new exp dir of maintab_fast_conformer

Browse files

Files changed (36) hide show

.gitattributes +4 -0
maintab_fast_conformer_10-09_01-54/25808631.csv +0 -0
maintab_fast_conformer_10-09_01-54/40250165.csv +0 -0
maintab_fast_conformer_10-09_01-54/60227680.csv +0 -0
maintab_fast_conformer_10-09_01-54/_dev_mf.json +0 -0
maintab_fast_conformer_10-09_01-54/_nemo_model_part_0.nemo +3 -0
maintab_fast_conformer_10-09_01-54/_nemo_model_part_1.nemo +3 -0
maintab_fast_conformer_10-09_01-54/_train_mf.json +3 -0
maintab_fast_conformer_10-09_01-54/args_trainer.txt +1 -0
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=0-valwerval_wer=0.9991.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=1-valwerval_wer=0.9954.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=108-valwerval_wer=0.4084.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=109-valwerval_wer=0.4073.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=110-valwerval_wer=0.4045.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=111-valwerval_wer=0.4008.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=112-valwerval_wer=0.3828.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=2-valwerval_wer=0.9649.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=28-valwerval_wer=1.1402.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=31-valwerval_wer=1.1436.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/code-folder/Demo/demo.yaml +269 -0
maintab_fast_conformer_10-09_01-54/code-folder/Demo/train.py +270 -0
maintab_fast_conformer_10-09_01-54/code-folder/Demo/utils.py +269 -0
maintab_fast_conformer_10-09_01-54/code-folder/__pycache__/train.cpython-311.pyc +0 -0
maintab_fast_conformer_10-09_01-54/code-folder/configs/fast_conformer.yaml +271 -0
maintab_fast_conformer_10-09_01-54/code-folder/train.py +315 -0
maintab_fast_conformer_10-09_01-54/code-folder/utils/__pycache__/utils.cpython-311.pyc +0 -0
maintab_fast_conformer_10-09_01-54/code-folder/utils/install_cmd.txt +22 -0
maintab_fast_conformer_10-09_01-54/code-folder/utils/utils.py +283 -0
maintab_fast_conformer_10-09_01-54/conf_model.txt +1 -0
maintab_fast_conformer_10-09_01-54/git_pip_env.txt +895 -0
maintab_fast_conformer_10-09_01-54/model_avg.ckpt +3 -0
maintab_fast_conformer_10-09_01-54/model_mp.txt +15 -0
maintab_fast_conformer_10-09_01-54/nemo_model_avg.nemo +3 -0
maintab_fast_conformer_10-09_01-54/training_process_100.png +0 -0
maintab_fast_conformer_10-09_01-54/training_process_112.png +0 -0
maintab_fast_conformer_10-09_01-54/training_process_42.png +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+maintab_fast_conformer_10-09_01-54/_nemo_model_part_0.nemo filter=lfs diff=lfs merge=lfs -text
+maintab_fast_conformer_10-09_01-54/_nemo_model_part_1.nemo filter=lfs diff=lfs merge=lfs -text
+maintab_fast_conformer_10-09_01-54/_train_mf.json filter=lfs diff=lfs merge=lfs -text
+maintab_fast_conformer_10-09_01-54/nemo_model_avg.nemo filter=lfs diff=lfs merge=lfs -text

maintab_fast_conformer_10-09_01-54/25808631.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

maintab_fast_conformer_10-09_01-54/40250165.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

maintab_fast_conformer_10-09_01-54/60227680.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

maintab_fast_conformer_10-09_01-54/_dev_mf.json ADDED Viewed

The diff for this file is too large to render. See raw diff

maintab_fast_conformer_10-09_01-54/_nemo_model_part_0.nemo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b647ea527cd25659039983a67633bf136e64405dd93c52fbbfd30511e79d7b6
+size 111626240

maintab_fast_conformer_10-09_01-54/_nemo_model_part_1.nemo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cef41435f6994b0ff1ac51f506d585959c92f5340d2708da2217e37a5c624be
+size 111626240

maintab_fast_conformer_10-09_01-54/_train_mf.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:198327931cefe7cada1e1ae27d9b594456407377d82c75f9f65e8d7f4d3d90b4
+size 15057811

maintab_fast_conformer_10-09_01-54/args_trainer.txt ADDED Viewed

	@@ -0,0 +1 @@

+ {'precision': 'bf16', 'devices': 1, 'num_nodes': 1, 'accelerator': 'gpu', 'strategy': 'auto', 'max_epochs': 1000, 'accumulate_grad_batches': 1, 'gradient_clip_val': 0.0, 'log_every_n_steps': 100, 'val_check_interval': 1.0, 'enable_progress_bar': False, 'num_sanity_val_steps': 0, 'check_val_every_n_epoch': 1, 'sync_batchnorm': True, 'benchmark': False, 'enable_checkpointing': True, 'max_time': '00:09:05:00', 'callbacks': [<V2_Run.Fast_conformer_nemo.utils.utils.LossLogger object at 0x7e2c6bc4fb90>, <lightning.pytorch.callbacks.early_stopping.EarlyStopping object at 0x7e2c6b6b3110>, <lightning.pytorch.callbacks.model_checkpoint.ModelCheckpoint object at 0x7e2c6b87f350>]}

maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=0-valwerval_wer=0.9991.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d347b08973443ac69db26151ac2ad15b7e65019240275abe3b5da1fc96406fde
+size 334971421

maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=1-valwerval_wer=0.9954.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:714aaba5e6afc63a396eccdf6df55f8c52ef736f312208574dd63dea9ea06a64
+size 334971804

maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=108-valwerval_wer=0.4084.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3253efb67dcac3d737b62ec40b370cc865452ce76536f27cea1139b551491e2b
+size 334972634

maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=109-valwerval_wer=0.4073.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2246d82d7db8b7d2541a5567fbf5c15538b3a1f16cf2c172efbaff438aacd812
+size 334972634

maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=110-valwerval_wer=0.4045.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0092203a63c727db37f170369c048aa4a3e1d5e81dd812b82d9f9573828c93f4
+size 334972634

maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=111-valwerval_wer=0.4008.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a79902648a37db478d1273b56974b77862457cfc026963060a57de3c8b30309c
+size 334972634

maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=112-valwerval_wer=0.3828.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9da164a893141bd05dac3280e072e62035e1335ef11e94f4eb69132fe3d88a53
+size 334972634

maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=2-valwerval_wer=0.9649.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54e1bd3fe262d3d7ae16ff8090b45132c3be639f957a4e0f7b5ad0cced8f93b6
+size 334972187

maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=28-valwerval_wer=1.1402.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f389a64fdaebd40f41653e35012352d3f22f07da22fcc58b0b740d446c6b5a43
+size 334972634

maintab_fast_conformer_10-09_01-54/ckpts/epochepoch=31-valwerval_wer=1.1436.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23d12861b5ce277e339631d5ad385d8e0988b8d241c66f272057d005c0aece2a
+size 334972634

maintab_fast_conformer_10-09_01-54/code-folder/Demo/demo.yaml ADDED Viewed

	@@ -0,0 +1,269 @@

+# It contains the default values for training a Fast Conformer-CTC ASR model, #large size (~120M) with CTC loss and sub-word encoding.
+# You may find more info about FastConformer here: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#fast-conformer
+# We suggest to use trainer.precision=bf16 for GPUs which support it otherwise trainer.precision=16 is recommended.
+# Using bf16 or 16 would make it possible to double the batch size and speedup training/inference. If fp16 is not stable and model diverges after some epochs, you may use fp32.
+# Here are the suggested batch size per GPU for each precision and memory sizes:
+#! fp16 >> 32 --> batch_size = 16
+#  +-----------+------------+------------+
+#  | Precision | GPU Memory | Batch Size |
+#  +===========+============+============+
+#  | 32        |    16GB    |     16     |
+#  |           |    32GB    |     32     |
+#  |           |    80GB    |     64     |
+#  +-----------+------------+------------+
+#  | fp16 or   |    16GB    |     32     |
+#  | bf16      |    32GB    |     64     |
+#  |           |    80GB    |     128    |
+#  +-----------+------------+------------+
+# Here are the recommended configs for different variants of FastConformer-CTC-BPE, other parameters are the same as in this config file.
+#
+#  +--------------+---------+---------+----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Model        | d_model | n_heads | n_layers |conv_kernel_size| weight_decay | pred_hidden/joint_hidden | pred_rnn_layers |  xscaling  |
+#  +==============+=========+========+===========+================+==============+==========================+=================+============+
+#  | Small  (14M) |   176   |    4   |    16     |        9       |     0.0      |           320            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Medium (32M) |   256   |    4   |    16     |        9       |     1e-3     |           640            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Large (120M) |   512   |    8   |    17     |        9       |     1e-3     |           640            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | XLarge (616M)|   1024  |    8   |    24     |        9       |     1e-3     |           640            |        2        |    False   |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | XXLarge(1.2B)|   1024  |    8   |    42     |        5       |     1e-3     |           640            |        2        |    False   |
+#  +--------------------------------------------------------------+--------------+--------------------------+-----------------+------------+
+# Note:  They are based on the assumption of max_duration of #20. If you have longer or shorter max_duration, then batch sizes may need to get updated accordingly.
+# Default learning parameters in this config are set for global batch size of 2K while you may use lower values.
+# To increase the global batch size with limited number of GPUs, you may use higher accumulate_grad_batches.
+# However accumulate_grad_batches is better to be avoided as long as the global batch size is large enough and training is stable.
+name: "FastConformer-CTC-BPE"
+train_path: "output/annot/train_ds.json"
+dev_path: "output/annot/dev_300.json"
+test_path: "output/annot/test_ds.json"
+vocab_dir: "output/annot/" #* correct
+full_val_path: "output/annot/dev_ds.json"
+train_ds_batch_size: 16
+gen_ds_batch_size: 16
+ds_max_duration: 32.0 #! Will be replaced
+aug_time_masks: 2 # Change from 10 to faster smooth optimize
+# Model parameters
+d_model: 256
+n_heads: 4
+n_layers: 16
+conv_kernel_size: 9
+xscaling: true
+# Optimize
+lr: 1e-3 #! Select lr and decay = 0.0, sched, warm_step
+weight_decay: 1e-3
+warmup_steps: 15000
+# Training parameters # Dont use # Dont use
+num_epochs: 1000
+precision: 32
+accumulate_grad_batches: 1
+model:
+  sample_rate: 16000
+  log_prediction: false #! (change from true to false) enables logging sample predictions in the output during training
+  ctc_reduction: "mean_volume"
+  skip_nan_grad: false
+  train_ds:
+    manifest_filepath: ${train_path}
+    sample_rate: ${model.sample_rate}
+    batch_size: ${train_ds_batch_size} # you may increase batch_size if your memory allows
+    shuffle: true
+    num_workers: 8
+    pin_memory: true
+    max_duration: ${ds_max_duration} # it is set for LibriSpeech, you may need to update it for your dataset
+    min_duration: 0.1
+    # tarred datasets
+    is_tarred: false
+    tarred_audio_filepaths: null
+    shuffle_n: 2048
+    # bucketing params
+    bucketing_strategy: "fully_randomized"
+    bucketing_batch_size: null
+  validation_ds:
+    manifest_filepath: ${dev_path}
+    sample_rate: ${model.sample_rate}
+    batch_size: ${gen_ds_batch_size} # you may increase batch_size if your memory allows
+    shuffle: false
+    use_start_end_token: false
+    num_workers: 8
+    pin_memory: true
+  test_ds:
+    manifest_filepath: ${test_path}
+    sample_rate: ${model.sample_rate}
+    batch_size: ${gen_ds_batch_size} # you may increase batch_size if your memory allows
+    shuffle: false
+    use_start_end_token: false
+    num_workers: 8
+    pin_memory: true
+  # recommend vocab size of 128 or 256 when training on ~1k hr datasets and 1k vocab size on 10+k hr datasets
+  # you may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py
+  tokenizer:
+    dir: ${vocab_dir} # path to directory which contains either tokenizer.model (bpe) or vocab.txt (wpe)
+    type: wpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)
+  preprocessor:
+    _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
+    sample_rate: ${model.sample_rate}
+    normalize: "per_feature"
+    window_size: 0.025
+    window_stride: 0.01
+    window: "hann"
+    features: 80
+    n_fft: 512
+    log: true
+    frame_splicing: 1
+    dither: 0.00001
+    pad_to: 0
+    pad_value: 0.0
+  spec_augment:
+    _target_: nemo.collections.asr.modules.SpectrogramAugmentation
+    freq_masks: 2 # set to zero to disable it
+    # you may use lower time_masks for smaller models to have a faster convergence
+    time_masks: ${aug_time_masks} # set to zero to disable it, from 10
+    freq_width: 27
+    time_width: 0.05
+  encoder:
+    _target_: nemo.collections.asr.modules.ConformerEncoder
+    feat_in: ${model.preprocessor.features}
+    feat_out: -1 # you may set it if you need different output size other than the default d_model
+    n_layers: ${n_layers}
+    d_model: ${d_model}
+    # Sub-sampling params
+    subsampling: dw_striding # vggnet, striding, stacking or stacking_norm, dw_striding
+    subsampling_factor: 8 # must be power of 2 for striding and vggnet
+    subsampling_conv_channels: 256 # -1 sets it to d_model = 256
+    causal_downsampling: false
+    # Feed forward module's params
+    ff_expansion_factor: 4
+    # Multi-headed Attention Module's params
+    self_attention_model: rel_pos # rel_pos or abs_pos
+    n_heads: ${n_heads} # may need to be lower for smaller d_models
+    # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
+    att_context_size: [-1, -1] # -1 means unlimited context
+    att_context_style: regular # regular or chunked_limited
+    xscaling: ${xscaling} # scales up the input embeddings by sqrt(d_model)
+    untie_biases: true # unties the biases of the TransformerXL layers
+    pos_emb_max_len: 5000
+    use_pytorch_sdpa: false #! use torch sdpa instead of manual attention
+    use_pytorch_sdpa_backends: [] # empty list means all backends https://pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html e.g. [MATH]
+    # Convolution module's params
+    conv_kernel_size: ${conv_kernel_size}
+    conv_norm_type: "batch_norm" # batch_norm or layer_norm or groupnormN (N specifies the number of groups)
+    # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size
+    # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0]
+    conv_context_size: null
+    ### regularization
+    dropout: 0.1 # The dropout used in most of the Conformer Modules
+    dropout_pre_encoder: 0.1 # The dropout used before the encoder
+    dropout_emb: 0.0 # The dropout used for embeddings
+    dropout_att: 0.1 # The dropout for multi-headed attention modules
+    # set to non-zero to enable stochastic depth
+    stochastic_depth_drop_prob: 0.0
+    stochastic_depth_mode: linear # linear or uniform
+    stochastic_depth_start_layer: 1
+  decoder:
+    _target_: nemo.collections.asr.modules.ConvASRDecoder
+    feat_in: null
+    num_classes: -1
+    vocabulary: []
+  # config for InterCTC loss: https://arxiv.org/abs/2102.03216
+  # specify loss weights and which layers to use for InterCTC
+  # e.g., to reproduce the paper results, set loss_weights: [0.3]
+  # and apply_at_layers: [8] (assuming 18 layers). Note that final
+  # layer loss coefficient is automatically adjusted (to 0.7 in above example)
+  interctc:
+    loss_weights: []
+    apply_at_layers: []
+  optim:
+    name: adamw
+    # lr: 1e-3 #! Select lr and decay, sched, warm_step
+    lr: ${lr}
+    # optimizer arguments
+    betas: [0.9, 0.98]
+    # less necessity for weight_decay as we already have large augmentations with SpecAug
+    # you may need weight_decay for large models, stable AMP training, small datasets, or when lower augmentations are used
+    # weight decay of 0.0 with lr of 2.0 also works fine
+    weight_decay: ${weight_decay}
+    # scheduler setup
+    sched:
+      name: CosineAnnealing
+      # scheduler config override
+      # warmup_steps: 15000
+      warmup_steps: ${warmup_steps}
+      warmup_ratio: null
+      min_lr: 1e-4
+# ==============================================================================
+# Dont use
+trainer:
+  devices: -1 # number of GPUs, -1 would use all available GPUs
+  num_nodes: 1
+  max_epochs: ${num_epochs}
+  max_steps: -1 # computed at runtime if not set
+  val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
+  accelerator: auto
+  strategy:
+    _target_: lightning.pytorch.strategies.DDPStrategy
+    gradient_as_bucket_view: true
+  accumulate_grad_batches: ${accumulate_grad_batches}
+  gradient_clip_val: 0.0
+  precision: ${precision} # 16, 32, or bf16
+  log_every_n_steps: 10 # Interval of logging.
+  enable_progress_bar: True
+  num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
+  check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs
+  sync_batchnorm: true
+  enable_checkpointing: False # Provided by exp_manager
+  logger: false # Provided by exp_manager
+  benchmark: false # needs to be false for models with variable-length speech input as it slows down training
+# ==============================================================================
+exp_manager:
+  exp_dir: null
+  name: ${name}
+  create_tensorboard_logger: true
+  create_checkpoint_callback: true
+  checkpoint_callback_params:
+    # in case of multiple validation sets, first one is used
+    monitor: "val_wer"
+    mode: "min"
+    save_top_k: 5
+    always_save_nemo: True # saves the checkpoints as nemo files instead of PTL checkpoints
+  resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
+  # you need to set these two to True to continue the training
+  resume_if_exists: false
+  resume_ignore_no_checkpoint: false
+  # You may use this section to create a W&B logger
+  create_wandb_logger: false
+  wandb_logger_kwargs:
+    name: null
+    project: null

maintab_fast_conformer_10-09_01-54/code-folder/Demo/train.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import os
+import subprocess
+import sys
+from sklearn.model_selection import train_test_split
+# Have first: V0 -> this
+if True:
+    sys.path.append(os.getcwd())
+    if True:
+        from V0_Import.import_src import *
+    if True:
+        from Fast_conformer_nemo.utils.utils import *
+    run_import_src = True
+# ==============================================================================
+# Ultil functions
+def set_all_seeds(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+def get_config_nemo():
+    # config_path = "Fast_conformer_nemo/configs/fast_conformer.yaml"
+    # if len(sys.argv) >= 2:
+    #     config_path = sys.argv[1]
+    print(config_path, flush=True)
+    with open(config_path, "r", encoding="utf-8") as f:
+        yaml_text = f.read().strip()
+    _config = OmegaConf.create(yaml_text)
+    config_dict = OmegaConf.to_container(_config, resolve=True)
+    print(f"\n{config_dict['model']}\n{config_dict}", flush=True)
+    return config_dict
+def get_train_dev_test(path):  # Train is full, dev is 300, test is full
+    data = []
+    print(path, flush=True)
+    with open(path, "r", encoding="utf-8") as f:
+        for line in f:
+            if line.strip():
+                dic = json.loads(line)
+                data.append(dic)
+    return data
+def get_info(train_path):
+    data = []
+    max_dur = 0.0
+    words = []
+    with open(train_path, "r", encoding="utf-8") as f:  # Same code with above
+        for line in f:
+            if line.strip():
+                dic = json.loads(line)
+                data.append(dic)
+                max_dur = max(max_dur, dic["duration"])
+                words.extend(str(dic["text"]).split())
+    words = list(set(sorted(words)))
+    return data, max_dur, len(words)
+# ==============================================================================
+# Set global vars and data
+if run_import_src:
+    set_all_seeds(42)  # Set random seed for reproducibility
+    config_dict = get_config_nemo()  # Get from path
+    data, max_dur, len_vocab = get_info(config_dict['train_path'])
+    sample_rate = 16000
+    dev_data, test_data = get_train_dev_test(
+        config_dict['dev_path']), get_train_dev_test(config_dict['test_path'])
+    full_val_data = get_train_dev_test(config_dict['full_val_path'])
+    log.info("Overall checking:")
+    log.info(f"Number of samples in manifest: {len(data)}")
+    log.info(
+        f"- Max duration in manifest: {max_dur:.2f} seconds, sample rate: {sample_rate}")
+    log.info(f"- Vocab in manifest: {len_vocab} units")
+    log.info(f"Number of dev is {len(dev_data)}, test is {len(test_data)}")
+# ==============================================================================
+# Create params, conf_model, mfpath and change st
+if run_import_src:
+    params = copy.deepcopy(config_dict)['model']
+    train_mfpath = os.path.join(res_exp_dir, "_train_mf.json")
+    dev_mfpath = os.path.join(res_exp_dir, "_dev_mf.json")
+    for ds in ['train_ds', 'validation_ds']:  # Change train and dev temporarily
+        if ds == 'train_ds':
+            params[ds]['manifest_filepath'] = train_mfpath
+            params[ds]['max_duration'] = round(max_dur + 0.1, 2)
+        else:
+            params[ds]['manifest_filepath'] = dev_mfpath
+    log.info(
+        f"Changed params['train_ds']['max_duration'] = {round(max_dur + 0.1, 2)} seconds \nand {train_mfpath}, {dev_mfpath} to write data!")
+    # Create an OmegaConf object from the dictionary
+    conf_model = OmegaConf.create(params)
+    write_txt_exp_dir("conf_model.txt", conf_model)
+    # Run pip freeze and capture output as string
+    pip_freeze_str = subprocess.check_output(["pip", "freeze"], text=True)
+    write_txt_exp_dir("pip_env.txt", pip_freeze_str)
+# ==============================================================================
+# Create subset from part and write it to mfpath
+def create_train_dev(root_train_data: list, begin_idx, end_idx, num_train, num_dev, root_dev_data=None):
+    def get_min_max_duration(data):
+        mi, ma = 100.0, 0.0
+        for dic in data:
+            mi = min(mi, dic["duration"])
+            ma = max(ma, dic["duration"])
+        return mi, ma
+    def split_data(data: list, k):
+        if k >= len(data):
+            return data
+        _train_data, _test_data = train_test_split(
+            data, test_size=k, random_state=42
+        )
+        return _test_data
+    # # Sort by distribution
+    # data = list(sorted(root_train_data, key=lambda x: int(
+    #     os.path.basename(x["audio_filepath"]).split("_")[0])))
+    # Select part
+    data = root_train_data.copy()
+    begin_idx = max(0, begin_idx)
+    end_idx = min(end_idx, len(data))
+    data = data[begin_idx:end_idx]
+    log.info(f"- Duration of this part: [{get_min_max_duration(data)}]")
+    # Select random subset/set from part: train from data, and dev from train or is root_dev
+    from tqdm import tqdm
+    num_train = min(num_train, len(data))
+    train_data = split_data(data, num_train)
+    if root_dev_data is None:
+        num_dev = min(num_dev, num_train)
+        dev_data = split_data(train_data, num_dev)
+    else:
+        dev_data = list(root_dev_data).copy()
+    # Write subset data back to a new file (or overwrite)
+    dev_in_train = True if root_dev_data is None else False
+    log.info(
+        f"- Number of train is {len(train_data)}, dev is {len(dev_data)}, dev in train: {dev_in_train}")
+    log.info(f"\n{train_data[0]}\n{dev_data[0]}\n")
+    with open(train_mfpath, "w", encoding="utf-8") as fout:
+        for item in train_data:
+            fout.write(json.dumps(item, ensure_ascii=False) + "\n")
+    with open(dev_mfpath, "w", encoding="utf-8") as fout:
+        for item in dev_data:
+            fout.write(json.dumps(item, ensure_ascii=False) + "\n")
+# ==============================================================================
+# Demo Nemo model and demo first
+def init_nemo_model(data, dev_data, conf_model):
+    log.info(f"\n\nInit nemo model:")
+    create_train_dev(root_train_data=data, begin_idx=0, end_idx=len(data), num_train=len(
+        data), num_dev=-1, root_dev_data=dev_data)  # Demo create data, v
+    trainer = create_new_trainer(epochs=1000, min_stop=0.0)  # Demo trainer, v
+    # trainer.fit(nemo_model)
+    nemo_model = nemo_asr.models.EncDecCTCModelBPE(
+        cfg=conf_model, trainer=trainer)
+    summary = ModelSummary(nemo_model)
+    print(summary)
+    return nemo_model
+# ==============================================================================
+# Train model: Multi part training
+def train_multi_turn(train_data, dev_data):
+    # Init nemo model
+    set_all_seeds(42)
+    global params, conf_model, res_exp_dir
+    nemo_model = init_nemo_model(
+        data=train_data, dev_data=dev_data, conf_model=conf_model)
+    # ! NOTE: Setting for multi part training
+    train_data = list(sorted(train_data, key=lambda x: x["duration"]))
+    cnt = len(train_data)
+    setting = {
+        "begin_idx": [0, 0],
+        "end_idx": [5000, cnt],
+        "num_train": [1000, cnt],
+        "num_dev": [100, -1],
+        "epochs": [100, 1000],  # ! Not run enough epochs
+        "min_stop": [0.4, 0.0]
+    }
+    num_part = len(setting["begin_idx"])
+    trainer = None
+    for i in range(num_part):
+        begin_idx = setting["begin_idx"][i]
+        end_idx = setting["end_idx"][i]
+        num_train = setting["num_train"][i]
+        num_dev = setting["num_dev"][i]
+        num_epochs = setting["epochs"][i]
+        min_stop = setting["min_stop"][i]
+        log.info(
+            f"\n\n Here {i}: {begin_idx} --> {end_idx} | {num_train}, {num_dev}, {cnt} | {num_epochs}, {min_stop}")
+        # Create train dev file
+        if num_dev <= 0:
+            create_train_dev(root_train_data=train_data, begin_idx=begin_idx, end_idx=end_idx,
+                             num_train=num_train, num_dev=num_dev, root_dev_data=dev_data)
+        else:
+            create_train_dev(root_train_data=train_data, begin_idx=begin_idx, end_idx=end_idx,
+                             num_train=num_train, num_dev=num_dev)
+        # Create trainer newly
+        trainer = create_new_trainer(
+            epochs=num_epochs, min_stop=min_stop)
+        nemo_model.setup_training_data(
+            train_data_config=params['train_ds'])  # Reload it
+        nemo_model.setup_validation_data(
+            val_data_config=params['validation_ds'])  # Reload it
+        trainer.fit(nemo_model)  # Fit
+        # Save it tmply
+        save_path = os.path.join(res_exp_dir, f"_nemo_model_part_{i}.nemo")
+        nemo_model.save_to(save_path)
+    return trainer, nemo_model
+# =======================
+def run_main_in_notebook():
+    global data, dev_data, res_exp_dir
+    trainer, nemo_model = train_multi_turn(data, dev_data)  # Train with data
+    # ==========================================================================
+    # Get paths from checkpoint callback
+    # # last in list of callbacks if added last
+    ckpt_callback = trainer.callbacks[-1]
+    best_paths = list(ckpt_callback.best_k_models.keys())
+    # Reload and save
+    nemo_model, avg_weights = reload_nemo_from_avg(
+        best_paths=best_paths, nemo_model=nemo_model)
+    avg_ckpt_path = f"{res_exp_dir}/model_avg.ckpt"
+    nemo_model_path = f"{res_exp_dir}/nemo_model_avg.nemo"
+    save_model_to_path(nemo_model, avg_weights, nemo_model_path, avg_ckpt_path)
+    # Gen, score and save for mfpath
+    nemo_inference_for_mfpath(nemo_model, config_dict['dev_path'])
+    nemo_inference_for_mfpath(nemo_model, config_dict['test_path'])
+    nemo_inference_for_mfpath(nemo_model, config_dict['full_val_path'])
+    push_exp_dir_to_hub(res_exp_dir)
+if __name__ == "__main__":
+    run_main_in_notebook()

maintab_fast_conformer_10-09_01-54/code-folder/Demo/utils.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import copy
+import csv
+import glob
+import json
+import logging
+import logging as log
+import os
+import random
+import re
+import shutil
+import string
+import sys
+import unicodedata
+import jiwer
+import lightning.pytorch as pl
+import nemo
+import nemo.collections.asr as nemo_asr
+import numpy as np
+import torch
+from datasets import load_dataset
+from jiwer import wer
+from lightning.pytorch.callbacks import Callback, EarlyStopping, ModelCheckpoint
+from lightning.pytorch.utilities.model_summary import ModelSummary
+from omegaconf import OmegaConf
+from scipy.io import wavfile
+from V0_Import.import_src import push_file_to_hub
+class LossLogger(Callback):
+    def __init__(self, exp_dir):
+        super().__init__()
+        self.train_losses = []
+        self.val_losses = []
+        self.train_wer = []
+        self.val_wer = []
+        self.num_last = 100  # ? epoch unit
+        self.num_plot = 100  # ? epoch
+        self.allow_show_plot = False  # ? Allow show plot in notebook
+        self.exp_dir = exp_dir
+    def on_train_epoch_end(self, trainer, pl_module):
+        train_loss = trainer.callback_metrics.get('train_loss')
+        epoch_idx = trainer.current_epoch
+        lr = trainer.optimizers[0].param_groups[0]['lr']  # Print lr
+        log.info(f"Epoch {epoch_idx} ended." + "=" * 100)
+        if train_loss is not None:
+            self.train_losses.append(train_loss.item())
+            log.info(f"Train Loss: {train_loss.item()}, lr: {lr}")
+        if epoch_idx != 0 and epoch_idx % self.num_plot == 0:
+            self._plot_train()
+    def on_validation_epoch_end(self, trainer, pl_module):
+        val_loss = trainer.callback_metrics.get('val_loss')
+        val_wer = trainer.callback_metrics.get('val_wer')
+        if val_loss is not None:
+            self.val_losses.append(val_loss.item())
+            log.info(f"Validation Loss: {val_loss.item()}")
+        if val_wer is not None:
+            self.val_wer.append(val_wer.item())
+            log.info(f"Validation WER: {val_wer.item()}")
+    def _plot_train(self):
+        import matplotlib.pyplot as plt
+        plt.figure(figsize=(10, 6))
+        plt.subplot(2, 1, 1)
+        num = self.num_last
+        plt.plot(self.train_losses[-num:], label='Training Loss')
+        plt.plot(self.val_losses[-num:], label='Validation Loss')
+        plt.xlabel('Epoch')
+        plt.ylabel('Loss')
+        plt.legend()
+        plt.title('Training and Validation Loss')
+        plt.subplot(2, 1, 2)
+        plt.plot(self.train_wer[-num:], label='Training WER')
+        plt.plot(self.val_wer[-num:], label='Validation WER')
+        plt.xlabel('Epoch')
+        plt.ylabel('WER')
+        plt.legend()
+        plt.title('Training and Validation WER')
+        plt.tight_layout()
+        # allow_show_plot = True  # Allow show plot in notebook
+        if self.allow_show_plot:
+            plt.show()
+        else:
+            plot_png = os.path.join(
+                self.exp_dir, f"training_process_{len(self.val_wer)}.png")
+            plt.savefig(plot_png)
+            push_file_to_hub(plot_png)
+    def on_train_end(self, trainer, pl_module):
+        self.num_last = len(self.val_wer)
+        self._plot_train()
+config_path = "Fast_conformer_nemo/configs/fast_conformer.yaml"  # ? NOTE: Setting
+res_exp_dir = "results_fast_conformer"  # ? NOTE: Setting
+os.makedirs(res_exp_dir, exist_ok=True)
+src_folder = "Fast_conformer_nemo"
+dst_folder = os.path.join(res_exp_dir, "code-folder")
+shutil.copytree(src_folder, dst_folder, dirs_exist_ok=True)
+log.info(f"Copied code to {dst_folder}")
+def write_txt_exp_dir(name, var):
+    path = os.path.join(res_exp_dir, name)
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(str(var))
+        f.close()
+# ==============================================================================
+def create_time_callbacks(num_keep, min_stop, max_hour):
+    # num_keep = 500
+    early_stop_callback = EarlyStopping(
+        monitor="val_wer",                  # Metric to monitor
+        mode="min",                         # Lower is better
+        stopping_threshold=min_stop,        # Stop if val_wer < 0.x
+        patience=num_keep,                  # Stop immediately when not reduce
+        verbose=True
+    )
+    # Keep top 5 checkpoints based on val_wer
+    num_avg = 5
+    save_last = False
+    checkpoint_callback = ModelCheckpoint(
+        dirpath=f"{res_exp_dir}/ckpts",     # Dir of ckpts
+        filename="epoch{epoch}-valwer{val_wer:.4f}",
+        monitor="val_wer",
+        mode="min",
+        save_top_k=num_avg,                 # Only keep 5 best
+        save_last=save_last,                # Also save last epoch: False
+    )
+    # max_time_training = "00:09:00:00"
+    max_time_training = f"00:{max_hour}:02:00"
+    callback_list = [LossLogger(res_exp_dir),
+                     early_stop_callback, checkpoint_callback]
+    return max_time_training, callback_list
+def create_new_trainer(epochs, min_stop, max_hour="09"):
+    # NOTE: Setting
+    max_hour = "09"  # ! Must edit when run
+    log.info(f"Hour to train is {max_hour}")
+    setting = {
+        'num_keep': 500,
+        'precision': 'bf16',  # ! Use AMP
+        'accumulate_grad_batches': 1,
+        'max_hour': max_hour,
+        'enable_progress_bar': False,   # ! Off bar training to shorter log
+    }
+    # Create callbacks
+    max_time_training, callback_list = create_time_callbacks(
+        num_keep=setting['num_keep'], min_stop=min_stop, max_hour=max_hour)
+    # Training args
+    trainer_dict = {
+        # Hardware
+        'precision': setting['precision'],  # Trade-off
+        'devices': 1,
+        'num_nodes': 1,
+        'accelerator': 'gpu',
+        'strategy': 'auto',  # Must: no multi gpu
+        # Training
+        'max_epochs': epochs,
+        'accumulate_grad_batches': setting['accumulate_grad_batches'],
+        'gradient_clip_val': 0.0,
+        # Prediction monitor
+        'log_every_n_steps': 100,  # Logging in a epoch train
+        'val_check_interval': 1.0,  # Compute wer after 1.0 epoch
+        # No-related
+        'enable_progress_bar': setting['enable_progress_bar'],
+        'num_sanity_val_steps': 0,
+        'check_val_every_n_epoch': 1,
+        # If True, enables cudnn benchmarking for faster training.
+        'sync_batchnorm': True,
+        'benchmark': False,
+        # Saving and callback: New setting for callbacks
+        'enable_checkpointing': True,
+        'max_time': max_time_training,
+        'callbacks': callback_list,
+    }
+    write_txt_exp_dir("args_trainer.txt", trainer_dict)
+    trainer = pl.Trainer(**trainer_dict)
+    return trainer
+# ==============================================================================
+def reload_nemo_from_avg(best_paths, nemo_model):
+    w_only = False  # NOTE: Use w_only = False because it error
+    load_strict = False
+    def average_checkpoints(paths):
+        avg_state_dict = None
+        for path in paths:
+            ckpt = torch.load(path, map_location="cpu",
+                              weights_only=w_only)["state_dict"]
+            if avg_state_dict is None:
+                avg_state_dict = {k: v.clone() for k, v in ckpt.items()}
+            else:
+                for k in avg_state_dict:
+                    # if it's int/bool, leave as-is
+                    if torch.is_floating_point(avg_state_dict[k]):
+                        avg_state_dict[k] += ckpt[k]
+        for k in avg_state_dict:
+            if torch.is_floating_point(avg_state_dict[k]):
+                avg_state_dict[k] /= len(paths)
+        return avg_state_dict
+    # Average
+    log.info(f"\n\nBest paths for AVG(model): {best_paths}")
+    avg_weights = average_checkpoints(best_paths)
+    # Assign averaged weights to NeMo model
+    nemo_model = nemo_model.to("cuda" if torch.cuda.is_available() else "cpu")
+    nemo_model.load_state_dict(avg_weights, strict=load_strict)
+    return nemo_model, avg_weights
+def save_model_to_path(nemo_model, avg_weights, nemo_model_path, avg_ckpt_path):
+    torch.save({"state_dict": avg_weights}, avg_ckpt_path)
+    nemo_model.save_to(nemo_model_path)
+    log.info(f"\n\nSaved avg weights (.ckpt) at {avg_ckpt_path}")
+    log.info(f"Saved averaged NeMo model at {nemo_model_path}")
+def nemo_inference_for_mfpath(nemo_model, mfpath):
+    def save_gen_list(text_list, gt_list):
+        random_name = ''.join(random.choices(
+            string.ascii_lowercase + string.digits, k=8))
+        file_path = f"{random_name}.csv"
+        # Save rd name
+        file_path = os.path.join(res_exp_dir, file_path)
+        log.info(f"Saved gen at {file_path}")
+        # Write it as .csv
+        with open(file_path, mode="w", newline="", encoding="utf-8") as f:
+            writer = csv.writer(f)
+            writer.writerow(["Gen", "GT"])  # header
+            for first, second in zip(text_list, gt_list):
+                writer.writerow([first, second])
+    with open(mfpath, "r", encoding="utf-8") as fin:
+        data = [json.loads(line) for line in fin]
+    log.info(f"\n\nLoaded {len(data)} entries from {mfpath}")
+    references = []
+    predictions = []
+    from tqdm import tqdm
+    for entry in data:  # Limit data if need
+        ref = entry['text']
+        audio_path = entry['audio_filepath']
+        with torch.no_grad():
+            pred = nemo_model.transcribe(audio_path, verbose=False)[0].text
+        # if use_norm:
+        #     pred = normalize_text_vietnamese(pred)
+        references.append(ref)
+        predictions.append(pred)
+    # Computer wer
+    wer_score = wer(references, predictions)
+    log.info(f"WER: {wer_score}")
+    # Save pred
+    save_gen_list(text_list=predictions, gt_list=references)
+    return wer_score

maintab_fast_conformer_10-09_01-54/code-folder/__pycache__/train.cpython-311.pyc ADDED Viewed

Binary file (16 kB). View file

maintab_fast_conformer_10-09_01-54/code-folder/configs/fast_conformer.yaml ADDED Viewed

	@@ -0,0 +1,271 @@

+# It contains the default values for training a Fast Conformer-CTC ASR model, #large size (~120M) with CTC loss and sub-word encoding.
+# You may find more info about FastConformer here: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#fast-conformer
+# We suggest to use trainer.precision=bf16 for GPUs which support it otherwise trainer.precision=16 is recommended.
+# Using bf16 or 16 would make it possible to double the batch size and speedup training/inference. If fp16 is not stable and model diverges after some epochs, you may use fp32.
+# Here are the suggested batch size per GPU for each precision and memory sizes:
+#! fp16 >> 32 --> batch_size = 16
+#  +-----------+------------+------------+
+#  | Precision | GPU Memory | Batch Size |
+#  +===========+============+============+
+#  | 32        |    16GB    |     16     |
+#  |           |    32GB    |     32     |
+#  |           |    80GB    |     64     |
+#  +-----------+------------+------------+
+#  | fp16 or   |    16GB    |     32     |
+#  | bf16      |    32GB    |     64     |
+#  |           |    80GB    |     128    |
+#  +-----------+------------+------------+
+# Here are the recommended configs for different variants of FastConformer-CTC-BPE, other parameters are the same as in this config file.
+#
+#  +--------------+---------+---------+----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Model        | d_model | n_heads | n_layers |conv_kernel_size| weight_decay | pred_hidden/joint_hidden | pred_rnn_layers |  xscaling  |
+#  +==============+=========+========+===========+================+==============+==========================+=================+============+
+#  | Small  (14M) |   176   |    4   |    16     |        9       |     0.0      |           320            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Medium (32M) |   256   |    4   |    16     |        9       |     1e-3     |           640            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Large (120M) |   512   |    8   |    17     |        9       |     1e-3     |           640            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | XLarge (616M)|   1024  |    8   |    24     |        9       |     1e-3     |           640            |        2        |    False   |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | XXLarge(1.2B)|   1024  |    8   |    42     |        5       |     1e-3     |           640            |        2        |    False   |
+#  +--------------------------------------------------------------+--------------+--------------------------+-----------------+------------+
+# Note:  They are based on the assumption of max_duration of #20. If you have longer or shorter max_duration, then batch sizes may need to get updated accordingly.
+# Default learning parameters in this config are set for global batch size of 2K while you may use lower values.
+# To increase the global batch size with limited number of GPUs, you may use higher accumulate_grad_batches.
+# However accumulate_grad_batches is better to be avoided as long as the global batch size is large enough and training is stable.
+name: "FastConformer-CTC-BPE"
+train_path: "V1_Setup/Out/1p/train_ds.json" # ! Must edit when run
+dev_path: "V1_Setup/Out/1p/test_ds_2x5.json"
+test_path: "V1_Setup/Out/1p/test_ds.json"
+vocab_dir: "V1_Setup/Out/1p/" #* correct
+full_val_path: "V1_Setup/Out/1p/dev_ds.json"
+train_ds_batch_size: 16
+gen_ds_batch_size: 16
+ds_max_duration: 32.0 #! Will be replaced
+# Model parameters
+d_model: 256
+n_heads: 4
+n_layers: 16
+conv_kernel_size: 9
+xscaling: true
+# Optimize
+lr: 1e-3 #! Select lr and decay = 0.0, sched, warm_step
+weight_decay: 1e-3
+warmup_steps: 15000
+aug_time_masks: 2 # Change from 10 to faster smooth optimize
+# Training parameters # Dont use # Dont use
+num_epochs: 1000
+precision: 32
+accumulate_grad_batches: 1
+subsampling_factor: 8
+model:
+  sample_rate: 16000
+  log_prediction: false #! (change from true to false) enables logging sample predictions in the output during training
+  ctc_reduction: "mean_volume"
+  skip_nan_grad: false
+  train_ds:
+    manifest_filepath: ${train_path}
+    sample_rate: ${model.sample_rate}
+    batch_size: ${train_ds_batch_size} # you may increase batch_size if your memory allows
+    shuffle: true
+    num_workers: 8
+    pin_memory: true
+    max_duration: ${ds_max_duration} # it is set for LibriSpeech, you may need to update it for your dataset
+    min_duration: 0.1
+    # tarred datasets
+    is_tarred: false
+    tarred_audio_filepaths: null
+    shuffle_n: 2048
+    # bucketing params
+    bucketing_strategy: "fully_randomized"
+    bucketing_batch_size: null
+  validation_ds:
+    manifest_filepath: ${dev_path}
+    sample_rate: ${model.sample_rate}
+    batch_size: ${gen_ds_batch_size} # you may increase batch_size if your memory allows
+    shuffle: false
+    use_start_end_token: false
+    num_workers: 8
+    pin_memory: true
+  test_ds:
+    manifest_filepath: ${test_path}
+    sample_rate: ${model.sample_rate}
+    batch_size: ${gen_ds_batch_size} # you may increase batch_size if your memory allows
+    shuffle: false
+    use_start_end_token: false
+    num_workers: 8
+    pin_memory: true
+  # recommend vocab size of 128 or 256 when training on ~1k hr datasets and 1k vocab size on 10+k hr datasets
+  # you may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py
+  tokenizer:
+    dir: ${vocab_dir} # path to directory which contains either tokenizer.model (bpe) or vocab.txt (wpe)
+    type: wpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)
+  preprocessor:
+    _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
+    sample_rate: ${model.sample_rate}
+    normalize: "per_feature"
+    window_size: 0.025
+    window_stride: 0.01
+    window: "hann"
+    features: 80
+    n_fft: 512
+    log: true
+    frame_splicing: 1
+    dither: 0.00001
+    pad_to: 0
+    pad_value: 0.0
+  spec_augment:
+    _target_: nemo.collections.asr.modules.SpectrogramAugmentation
+    freq_masks: 2 # set to zero to disable it
+    # you may use lower time_masks for smaller models to have a faster convergence
+    time_masks: ${aug_time_masks} # ! set to zero to disable it, from 10
+    freq_width: 27
+    time_width: 0.05
+  encoder:
+    _target_: nemo.collections.asr.modules.ConformerEncoder
+    feat_in: ${model.preprocessor.features}
+    feat_out: -1 # you may set it if you need different output size other than the default d_model
+    n_layers: ${n_layers}
+    d_model: ${d_model}
+    # Sub-sampling params
+    subsampling: dw_striding # vggnet, striding, stacking or stacking_norm, dw_striding
+    subsampling_factor: ${subsampling_factor} # ! must be power of 2 for striding and vggnet
+    subsampling_conv_channels: 256 # -1 sets it to d_model = 256
+    causal_downsampling: false
+    # Feed forward module's params
+    ff_expansion_factor: 4
+    # Multi-headed Attention Module's params
+    self_attention_model: rel_pos # rel_pos or abs_pos
+    n_heads: ${n_heads} # may need to be lower for smaller d_models
+    # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
+    att_context_size: [-1, -1] # -1 means unlimited context
+    att_context_style: regular # regular or chunked_limited
+    xscaling: ${xscaling} # scales up the input embeddings by sqrt(d_model)
+    untie_biases: true # unties the biases of the TransformerXL layers
+    pos_emb_max_len: 5000
+    use_pytorch_sdpa: false #! use torch sdpa instead of manual attention
+    use_pytorch_sdpa_backends: [] # empty list means all backends https://pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html e.g. [MATH]
+    # Convolution module's params
+    conv_kernel_size: ${conv_kernel_size}
+    conv_norm_type: "batch_norm" # batch_norm or layer_norm or groupnormN (N specifies the number of groups)
+    # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size
+    # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0]
+    conv_context_size: null
+    ### regularization
+    dropout: 0.1 # The dropout used in most of the Conformer Modules
+    dropout_pre_encoder: 0.1 # The dropout used before the encoder
+    dropout_emb: 0.0 # The dropout used for embeddings
+    dropout_att: 0.1 # The dropout for multi-headed attention modules
+    # set to non-zero to enable stochastic depth
+    stochastic_depth_drop_prob: 0.0
+    stochastic_depth_mode: linear # linear or uniform
+    stochastic_depth_start_layer: 1
+  decoder:
+    _target_: nemo.collections.asr.modules.ConvASRDecoder
+    feat_in: null
+    num_classes: -1
+    vocabulary: []
+  # config for InterCTC loss: https://arxiv.org/abs/2102.03216
+  # specify loss weights and which layers to use for InterCTC
+  # e.g., to reproduce the paper results, set loss_weights: [0.3]
+  # and apply_at_layers: [8] (assuming 18 layers). Note that final
+  # layer loss coefficient is automatically adjusted (to 0.7 in above example)
+  interctc:
+    loss_weights: []
+    apply_at_layers: []
+  optim:
+    name: adamw
+    # lr: 1e-3 #! Select lr and decay, sched, warm_step
+    lr: ${lr}
+    # optimizer arguments
+    betas: [0.9, 0.98]
+    # less necessity for weight_decay as we already have large augmentations with SpecAug
+    # you may need weight_decay for large models, stable AMP training, small datasets, or when lower augmentations are used
+    # weight decay of 0.0 with lr of 2.0 also works fine
+    weight_decay: ${weight_decay}
+    # scheduler setup
+    sched:
+      name: CosineAnnealing
+      # scheduler config override
+      # warmup_steps: 15000
+      warmup_steps: ${warmup_steps}
+      warmup_ratio: null
+      min_lr: 1e-4
+# ==============================================================================
+# Dont use
+trainer:
+  devices: -1 # number of GPUs, -1 would use all available GPUs
+  num_nodes: 1
+  max_epochs: ${num_epochs}
+  max_steps: -1 # computed at runtime if not set
+  val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
+  accelerator: auto
+  strategy:
+    _target_: lightning.pytorch.strategies.DDPStrategy
+    gradient_as_bucket_view: true
+  accumulate_grad_batches: ${accumulate_grad_batches}
+  gradient_clip_val: 0.0
+  precision: ${precision} # 16, 32, or bf16
+  log_every_n_steps: 10 # Interval of logging.
+  enable_progress_bar: True
+  num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
+  check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs
+  sync_batchnorm: true
+  enable_checkpointing: False # Provided by exp_manager
+  logger: false # Provided by exp_manager
+  benchmark: false # needs to be false for models with variable-length speech input as it slows down training
+# ==============================================================================
+exp_manager:
+  exp_dir: null
+  name: ${name}
+  create_tensorboard_logger: true
+  create_checkpoint_callback: true
+  checkpoint_callback_params:
+    # in case of multiple validation sets, first one is used
+    monitor: "val_wer"
+    mode: "min"
+    save_top_k: 5
+    always_save_nemo: True # saves the checkpoints as nemo files instead of PTL checkpoints
+  resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
+  # you need to set these two to True to continue the training
+  resume_if_exists: false
+  resume_ignore_no_checkpoint: false
+  # You may use this section to create a W&B logger
+  create_wandb_logger: false
+  wandb_logger_kwargs:
+    name: null
+    project: null

maintab_fast_conformer_10-09_01-54/code-folder/train.py ADDED Viewed

	@@ -0,0 +1,315 @@

+import os
+import subprocess
+import sys
+from sklearn.model_selection import train_test_split
+# Have first: V0 -> this
+if True:
+    sys.path.append(os.getcwd())
+    if True:
+        from V0_Import.import_src import *
+    if True:
+        from V2_Run.Fast_conformer_nemo.utils.utils import *
+    run_import_src = True
+# ==============================================================================
+# Ultil functions
+# Dont need to edit, please
+def set_all_seeds(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+def get_config_nemo():
+    # config_path = "Fast_conformer_nemo/configs/fast_conformer.yaml"
+    # if len(sys.argv) >= 2:
+    #     config_path = sys.argv[1]
+    print(config_path, flush=True)
+    with open(config_path, "r", encoding="utf-8") as f:
+        yaml_text = f.read().strip()
+    _config = OmegaConf.create(yaml_text)
+    config_dict = OmegaConf.to_container(_config, resolve=True)
+    print(f"\n{config_dict['model']}\n{config_dict}", flush=True)
+    return config_dict
+def get_train_dev_test(path):  # Train is full, dev is 300, test is full
+    data = []
+    print(path, flush=True)
+    with open(path, "r", encoding="utf-8") as f:
+        for line in f:
+            if line.strip():
+                dic = json.loads(line)
+                data.append(dic)
+    return data
+def get_info(train_path):
+    data = []
+    max_dur = 0.0
+    max_lens = []
+    words = []
+    with open(train_path, "r", encoding="utf-8") as f:  # Same code with above
+        for line in f:
+            if line.strip():
+                dic = json.loads(line)
+                data.append(dic)
+                max_dur = max(max_dur, dic["duration"])
+                max_lens.append(len(str(dic["text"]).split()))
+                words.extend(str(dic["text"]).split())
+    words = list(set(sorted(words)))
+    return data, max_dur, max_lens, len(words)
+# ==============================================================================
+# Set global vars and data
+if run_import_src:
+    set_all_seeds(42)  # Set random seed for reproducibility
+    config_dict = get_config_nemo()  # Get from path
+    data, max_dur, max_lens, len_vocab = get_info(config_dict['train_path'])
+    sample_rate = 16000
+    dev_data, test_data = get_train_dev_test(
+        config_dict['dev_path']), get_train_dev_test(config_dict['test_path'])
+    full_val_data = get_train_dev_test(config_dict['full_val_path'])
+    log.info("Overall checking:")
+    log.info(f"Number of samples in manifest: {len(data)}")
+    log.info(
+        f"- Max duration in manifest: {max_dur:.2f} seconds, sample rate: {sample_rate}")
+    log.info(
+        f"- Vocab in manifest: {len_vocab} units, max length (L tokens): {np.mean(max_lens):.2f}, {max(max_lens)}")
+    log.info(f"Number of dev is {len(dev_data)}, test is {len(test_data)}")
+# ==============================================================================
+# Create params, conf_model, mfpath and change st
+if run_import_src:
+    params = copy.deepcopy(config_dict)['model']
+    train_mfpath = os.path.join(res_exp_dir, "_train_mf.json")
+    dev_mfpath = os.path.join(res_exp_dir, "_dev_mf.json")
+    for ds in ['train_ds', 'validation_ds']:  # Change train and dev temporarily
+        if ds == 'train_ds':
+            params[ds]['manifest_filepath'] = train_mfpath
+            params[ds]['max_duration'] = round(max_dur + 0.1, 2)
+        else:
+            params[ds]['manifest_filepath'] = dev_mfpath
+    log.info(
+        f"Changed params['train_ds']['max_duration'] = {round(max_dur + 0.1, 2)} seconds \nand {train_mfpath}, {dev_mfpath} to write data!")
+    # Create an OmegaConf object from the dictionary
+    conf_model = OmegaConf.create(params)
+    write_txt_exp_dir("conf_model.txt", conf_model)
+    # Run pip freeze and capture output as string
+    pip_freeze_str = subprocess.check_output(
+        ["git", "log", "-1",
+            '--pretty=format:%h %s%nAuthor date: %ad%nCommit date: %cd', "--date=iso"],
+        text=True
+    ) + "\n"
+    pip_freeze_str += subprocess.check_output(
+        ["python", "--version"], text=True) + "\n"
+    pip_freeze_str += subprocess.check_output(
+        ["pip", "freeze"], text=True) + "\n"
+    write_txt_exp_dir("git_pip_env.txt", pip_freeze_str)
+    # Some for training info
+    log.info(f"train_ds_batch_size: {config_dict['train_ds_batch_size']}")
+    log.info(
+        f"warmup_steps: {config_dict['warmup_steps']}, lr: {config_dict['lr']}, weight_decay: {config_dict['weight_decay']}")
+    # log.info(f"subsampling_factor: {config_dict['subsampling_factor']}")
+# ==============================================================================
+# Create subset from part and write it to global mfpath
+def create_train_dev(root_train_data: list, begin_idx, end_idx, num_train, num_dev, root_dev_data=None):
+    global train_mfpath, dev_mfpath
+    def get_min_max_duration(data):
+        mi, ma = 100.0, 0.0
+        for dic in data:
+            mi = min(mi, dic["duration"])
+            ma = max(ma, dic["duration"])
+        return mi, ma
+    def split_data(data: list, k):
+        if k >= len(data):
+            return data
+        _train_data, _test_data = train_test_split(
+            data, test_size=k, random_state=42
+        )
+        return _test_data
+    # # Sort by distribution
+    # data = list(sorted(root_train_data, key=lambda x: int(
+    #     os.path.basename(x["audio_filepath"]).split("_")[0])))
+    # Select part
+    data = root_train_data.copy()
+    begin_idx = max(0, begin_idx)
+    end_idx = min(end_idx, len(data))
+    data = data[begin_idx:end_idx]
+    log.info(f"- Duration of this part: [{get_min_max_duration(data)}]")
+    # Select random subset/set from part: train from data, and dev from train or is root_dev
+    from tqdm import tqdm
+    num_train = min(num_train, len(data))
+    train_data = split_data(data, num_train)
+    if root_dev_data is None:
+        num_dev = min(num_dev, num_train)
+        dev_data = split_data(train_data, num_dev)
+    else:
+        dev_data = list(root_dev_data).copy()
+    # Write subset data back to a new file (or overwrite)
+    dev_in_train = True if root_dev_data is None else False
+    log.info(
+        f"- Number of train is {len(train_data)}, dev is {len(dev_data)}, dev in train: {dev_in_train}")
+    log.info(f"\n{train_data[0]}\n{dev_data[0]}\n")
+    with open(train_mfpath, "w", encoding="utf-8") as fout:
+        for item in train_data:
+            fout.write(json.dumps(item, ensure_ascii=False) + "\n")
+    with open(dev_mfpath, "w", encoding="utf-8") as fout:
+        for item in dev_data:
+            fout.write(json.dumps(item, ensure_ascii=False) + "\n")
+# ==============================================================================
+# Demo Nemo model and demo first
+def init_nemo_model(data, dev_data, conf_model):
+    log.info(f"\n\nInit nemo model:")
+    create_train_dev(root_train_data=data, begin_idx=0, end_idx=len(data), num_train=len(
+        data), num_dev=-1, root_dev_data=dev_data)  # Demo create data, v
+    trainer = create_new_trainer(epochs=1000, min_stop=0.0)  # Demo trainer, v
+    # trainer.fit(nemo_model)
+    nemo_model = nemo_asr.models.EncDecCTCModelBPE(
+        cfg=conf_model, trainer=trainer)
+    # print(nemo_model)
+    summary = ModelSummary(nemo_model)
+    print(summary)
+    write_txt_exp_dir("model_mp.txt", summary)  # Write MP of model
+    check_tokenizer(nemo_model)
+    return nemo_model
+def check_tokenizer(nemo_model):
+    # Inspect the tokenizer inside your model
+    print("Tokenizer type:", nemo_model.tokenizer.__class__.__name__)
+    print("Vocab size:", nemo_model.tokenizer.vocab_size)
+    # Show first few vocab tokens
+    # if hasattr(nemo_model.tokenizer, "vocab"):
+    #     print("First tokens:", list(nemo_model.tokenizer.vocab.keys())[:20])
+    # Test text → ids → text round-trip
+    sample = "016 017 100"
+    ids = nemo_model.tokenizer.text_to_ids(sample)
+    print("Text:", sample)
+    print("IDs:", ids)
+    print("Back to text:", nemo_model.tokenizer.ids_to_text(ids))
+# ==============================================================================
+# Train model: Multi part training
+def train_multi_turn(train_data, dev_data):
+    # Init nemo model
+    set_all_seeds(42)
+    global params, conf_model, res_exp_dir
+    nemo_model = init_nemo_model(
+        data=train_data, dev_data=dev_data, conf_model=conf_model)
+    # ! Setting for multi part training
+    train_data = list(sorted(train_data, key=lambda x: x["duration"]))
+    cnt = len(train_data)
+    setting = {
+        "begin_idx": [0, 0],
+        "end_idx": [5000, cnt],
+        "num_train": [1000, cnt],
+        "num_dev": [100, -1],
+        "epochs": [200, 1000],  # ! Not run enough epochs
+        "min_stop": [0.4, 0.0]  # ! 0.4 --> 0.25
+    }
+    num_part = len(setting["begin_idx"])
+    trainer = None
+    for i in range(num_part):
+        begin_idx = setting["begin_idx"][i]
+        end_idx = setting["end_idx"][i]
+        num_train = setting["num_train"][i]
+        num_dev = setting["num_dev"][i]
+        num_epochs = setting["epochs"][i]
+        min_stop = setting["min_stop"][i]
+        log.info(
+            f"\n\n Here {i}: {begin_idx} --> {end_idx} | {num_train}, {num_dev}, {cnt} | {num_epochs}, {min_stop}")
+        # Create train dev file
+        if num_dev <= 0:
+            create_train_dev(root_train_data=train_data, begin_idx=begin_idx, end_idx=end_idx,
+                             num_train=num_train, num_dev=num_dev, root_dev_data=dev_data)
+        else:
+            create_train_dev(root_train_data=train_data, begin_idx=begin_idx, end_idx=end_idx,
+                             num_train=num_train, num_dev=num_dev)
+        # Create trainer newly
+        trainer = create_new_trainer(
+            epochs=num_epochs, min_stop=min_stop)
+        nemo_model.setup_training_data(
+            train_data_config=params['train_ds'])  # Reload it
+        nemo_model.setup_validation_data(
+            val_data_config=params['validation_ds'])  # Reload it
+        trainer.fit(nemo_model)  # Fit
+        # Save it tmply
+        save_path = os.path.join(res_exp_dir, f"_nemo_model_part_{i}.nemo")
+        nemo_model.save_to(save_path)
+    return trainer, nemo_model
+# =======================
+# Dont need to edit, please
+def run_main_in_notebook():
+    global data, dev_data, res_exp_dir
+    trainer, nemo_model = train_multi_turn(data, dev_data)  # Train with data
+    # ==========================================================================
+    # Get paths from checkpoint callback
+    # # last in list of callbacks if added last
+    ckpt_callback = trainer.callbacks[-1]
+    best_paths = list(ckpt_callback.best_k_models.keys())
+    # Reload and save
+    nemo_model, avg_weights = reload_nemo_from_avg(
+        best_paths=best_paths, nemo_model=nemo_model)
+    avg_ckpt_path = f"{res_exp_dir}/model_avg.ckpt"
+    nemo_model_path = f"{res_exp_dir}/nemo_model_avg.nemo"
+    save_model_to_path(nemo_model, avg_weights, nemo_model_path, avg_ckpt_path)
+    # Gen, score and save for mfpath
+    nemo_inference_for_mfpath(nemo_model, config_dict['dev_path'])
+    nemo_inference_for_mfpath(nemo_model, config_dict['test_path'])
+    nemo_inference_for_mfpath(nemo_model, config_dict['full_val_path'])
+    push_exp_dir_to_hub(res_exp_dir)
+    return trainer, nemo_model
+if __name__ == "__main__":
+    run_main_in_notebook()

maintab_fast_conformer_10-09_01-54/code-folder/utils/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (15.9 kB). View file

maintab_fast_conformer_10-09_01-54/code-folder/utils/install_cmd.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+run_cmd("pip install -q datasets==3.6.0")
+run_cmd("pip install -q numpy==1.26.4")
+run_cmd("pip install -q scipy==1.15.2")
+run_cmd("pip install -q omegaconf==2.3.0")
+run_cmd("pip install -q torch==2.6.0+cu124")
+run_cmd("pip install -q accelerate==1.5.2")
+run_cmd("pip install -q lightning==2.4.0")
+run_cmd("pip install -q lightning-utilities==0.14.3")
+run_cmd("pip install -q pytorch-lightning==2.5.1.post0")
+run_cmd("pip install -q jiwer==4.0.0")
+run_cmd("apt-get update && apt-get install -y libsndfile1 ffmpeg")
+run_cmd("pip install Cython")
+run_cmd("pip install packaging")
+run_cmd("pip -q install nemo_toolkit['asr']")
+# NOTE: Special for env
+run_cmd("pip install python-dotenv")

maintab_fast_conformer_10-09_01-54/code-folder/utils/utils.py ADDED Viewed

	@@ -0,0 +1,283 @@

+import copy
+import csv
+import glob
+import json
+import logging
+import logging as log
+import os
+import random
+import re
+import shutil
+import string
+import sys
+import unicodedata
+import jiwer
+import lightning.pytorch as pl
+import nemo
+import nemo.collections.asr as nemo_asr
+import numpy as np
+import torch
+from datasets import load_dataset
+from jiwer import wer
+from lightning.pytorch.callbacks import Callback, EarlyStopping, ModelCheckpoint
+from lightning.pytorch.utilities.model_summary import ModelSummary
+from omegaconf import OmegaConf
+from scipy.io import wavfile
+# * V0 -> this
+from V0_Import.import_src import push_file_to_hub
+# Dont need to edit, please
+class LossLogger(Callback):
+    def __init__(self, exp_dir):
+        super().__init__()
+        self.train_losses = []
+        self.val_losses = []
+        self.train_wer = []
+        self.val_wer = []
+        self.num_last = 100  # ? epoch unit
+        self.num_plot = 100  # ? epoch
+        self.allow_show_plot = False  # ? Allow show plot in notebook
+        self.exp_dir = exp_dir
+    def on_train_epoch_end(self, trainer, pl_module):
+        log.info(trainer.callback_metrics)
+        train_loss = trainer.callback_metrics.get('train_loss')
+        epoch_idx = trainer.current_epoch
+        lr = trainer.optimizers[0].param_groups[0]['lr']  # Print lr
+        log.info(f"Epoch {epoch_idx} ended." + "=" * 100)
+        if train_loss is not None:
+            self.train_losses.append(train_loss.item())
+            log.info(f"Train Loss: {train_loss.item()}, lr: {lr}")
+        if epoch_idx != 0 and epoch_idx % self.num_plot == 0:
+            self._plot_train()
+    def on_validation_epoch_end(self, trainer, pl_module):
+        val_loss = trainer.callback_metrics.get('val_loss')
+        val_wer = trainer.callback_metrics.get('val_wer')
+        if val_loss is not None:
+            self.val_losses.append(val_loss.item())
+            log.info(f"Validation Loss: {val_loss.item()}")
+        if val_wer is not None:
+            self.val_wer.append(val_wer.item())
+            log.info(f"Validation WER: {val_wer.item()}")
+    def _plot_train(self):
+        import matplotlib.pyplot as plt
+        plt.figure(figsize=(10, 6))
+        plt.subplot(2, 1, 1)
+        num = self.num_last
+        plt.plot(self.train_losses[-num:], label='Training Loss')
+        plt.plot(self.val_losses[-num:], label='Validation Loss')
+        plt.xlabel('Epoch')
+        plt.ylabel('Loss')
+        plt.legend()
+        plt.title('Training and Validation Loss')
+        plt.subplot(2, 1, 2)
+        plt.plot(self.train_wer[-num:], label='Training WER')
+        plt.plot(self.val_wer[-num:], label='Validation WER')
+        plt.xlabel('Epoch')
+        plt.ylabel('WER')
+        plt.legend()
+        plt.title('Training and Validation WER')
+        plt.tight_layout()
+        # allow_show_plot = True  # Allow show plot in notebook
+        if self.allow_show_plot:
+            plt.show()
+        else:
+            plot_png = os.path.join(
+                self.exp_dir, f"training_process_{len(self.val_wer)}.png")
+            plt.savefig(plot_png)
+            push_file_to_hub(plot_png)
+    def on_train_end(self, trainer, pl_module):
+        self.num_last = len(self.val_wer)
+        self._plot_train()
+config_path = "V2_Run/Fast_conformer_nemo/configs/fast_conformer.yaml"  # ? NOTE: Setting
+res_exp_dir = "maintab_fast_conformer"  # ? NOTE: Setting
+src_folder = "V2_Run/Fast_conformer_nemo"  # ? NOTE: Setting
+os.makedirs(res_exp_dir, exist_ok=True)
+dst_folder = os.path.join(res_exp_dir, "code-folder")
+shutil.copytree(src_folder, dst_folder, dirs_exist_ok=True)
+log.info(f"Copied code to {dst_folder}")
+def write_txt_exp_dir(name, var):
+    path = os.path.join(res_exp_dir, name)
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(str(var))
+        f.close()
+# ==============================================================================
+def create_time_callbacks(num_keep, min_stop, max_hour):
+    # num_keep = 500
+    early_stop_callback = EarlyStopping(
+        monitor="val_wer",                  # Metric to monitor
+        mode="min",                         # Lower is better
+        stopping_threshold=min_stop,        # Stop if val_wer < 0.x
+        patience=num_keep,                  # Stop immediately when not reduce
+        verbose=True
+    )
+    # Keep top 5 checkpoints based on val_wer
+    num_avg = 5
+    save_last = False
+    checkpoint_callback = ModelCheckpoint(
+        dirpath=f"{res_exp_dir}/ckpts",     # Dir of ckpts
+        filename="epoch{epoch}-valwer{val_wer:.4f}",
+        monitor="val_wer",
+        mode="min",
+        save_top_k=num_avg,                 # Only keep 5 best
+        save_last=save_last,                # Also save last epoch: False
+    )
+    # max_time_training = "00:09:00:00"
+    max_time_training = f"00:{max_hour}:05:00"  # ! Minutes
+    callback_list = [LossLogger(res_exp_dir),
+                     early_stop_callback, checkpoint_callback]
+    return max_time_training, callback_list
+def create_new_trainer(epochs, min_stop, max_hour="09"):
+    # NOTE: Setting
+    max_hour = "09"  # ! Must edit when run
+    log.info(f"Hour to train is {max_hour}")
+    setting = {
+        'num_keep': 500,
+        'precision': 'bf16',  # ! Use AMP
+        # 'precision': 32,
+        'accumulate_grad_batches': 1,
+        'max_hour': max_hour,
+        'enable_progress_bar': False,  # Off bar training to shorter log
+    }
+    log.info(f"Precision to train is {setting['precision']}")
+    # Batch size = 16 x accumulate_grad_batches
+    log.info(
+        f"Grad batch size to train is x{setting['accumulate_grad_batches']}")
+    # Create callbacks
+    max_time_training, callback_list = create_time_callbacks(
+        num_keep=setting['num_keep'], min_stop=min_stop, max_hour=max_hour)
+    # Training args
+    trainer_dict = {
+        # Hardware
+        'precision': setting['precision'],  # Trade-off
+        'devices': 1,
+        'num_nodes': 1,
+        'accelerator': 'gpu',
+        'strategy': 'auto',  # Must: no multi gpu
+        # Training
+        'max_epochs': epochs,
+        'accumulate_grad_batches': setting['accumulate_grad_batches'],
+        'gradient_clip_val': 0.0,
+        # Prediction monitor
+        'log_every_n_steps': 100,  # Logging in a epoch train
+        'val_check_interval': 1.0,  # Compute wer after 1.0 epoch
+        # No-related
+        'enable_progress_bar': setting['enable_progress_bar'],
+        'num_sanity_val_steps': 0,
+        'check_val_every_n_epoch': 1,
+        # If True, enables cudnn benchmarking for faster training.
+        'sync_batchnorm': True,
+        'benchmark': False,
+        # Saving and callback: New setting for callbacks
+        'enable_checkpointing': True,
+        'max_time': max_time_training,
+        'callbacks': callback_list,
+    }
+    write_txt_exp_dir("args_trainer.txt", trainer_dict)
+    trainer = pl.Trainer(**trainer_dict)
+    return trainer
+# ==============================================================================
+# Dont need to edit, please
+def reload_nemo_from_avg(best_paths, nemo_model):
+    w_only = False  # NOTE: Use w_only = False because it error
+    load_strict = False
+    def average_checkpoints(paths):
+        avg_state_dict = None
+        for path in paths:
+            ckpt = torch.load(path, map_location="cpu",
+                              weights_only=w_only)["state_dict"]
+            if avg_state_dict is None:
+                avg_state_dict = {k: v.clone() for k, v in ckpt.items()}
+            else:
+                for k in avg_state_dict:
+                    # if it's int/bool, leave as-is
+                    if torch.is_floating_point(avg_state_dict[k]):
+                        avg_state_dict[k] += ckpt[k]
+        for k in avg_state_dict:
+            if torch.is_floating_point(avg_state_dict[k]):
+                avg_state_dict[k] /= len(paths)
+        return avg_state_dict
+    # Average
+    log.info(f"\n\nBest paths for AVG(model): {best_paths}")
+    avg_weights = average_checkpoints(best_paths)
+    # Assign averaged weights to NeMo model
+    nemo_model = nemo_model.to("cuda" if torch.cuda.is_available() else "cpu")
+    nemo_model.load_state_dict(avg_weights, strict=load_strict)
+    return nemo_model, avg_weights
+def save_model_to_path(nemo_model, avg_weights, nemo_model_path, avg_ckpt_path):
+    torch.save({"state_dict": avg_weights}, avg_ckpt_path)
+    nemo_model.save_to(nemo_model_path)
+    log.info(f"\n\nSaved avg weights (.ckpt) at {avg_ckpt_path}")
+    log.info(f"Saved averaged NeMo model at {nemo_model_path}")
+def nemo_inference_for_mfpath(nemo_model, mfpath):
+    def save_gen_list(text_list, gt_list, name_list):
+        random_name = ''.join(random.choices(string.digits, k=8))
+        file_path = f"{random_name}.csv"
+        # Save rd name
+        file_path = os.path.join(res_exp_dir, file_path)
+        log.info(f"Saved gen at {file_path}")
+        # Write it as .csv
+        with open(file_path, mode="w", newline="", encoding="utf-8") as f:
+            writer = csv.writer(f)
+            writer.writerow(["Gen", "GT", "Name"])  # header
+            for first, second, name in zip(text_list, gt_list, name_list):
+                writer.writerow([first, second, name])
+        return file_path
+    with open(mfpath, "r", encoding="utf-8") as fin:
+        data = [json.loads(line) for line in fin]
+    log.info(f"\n\nLoaded {len(data)} entries from {mfpath}")
+    references = []
+    predictions = []
+    names = []
+    from tqdm import tqdm
+    for entry in data:  # Limit data if need
+        ref = entry['text']
+        audio_path = entry['audio_filepath']
+        with torch.no_grad():
+            pred = nemo_model.transcribe(audio_path, verbose=False)[0].text
+        # if use_norm:
+        #     pred = normalize_text_vietnamese(pred)
+        references.append(ref)
+        predictions.append(pred)
+        names.append(os.path.basename(audio_path))
+    # Computer wer
+    wer_score = wer(references, predictions)
+    log.info(f"WER: {wer_score}")
+    # Save pred
+    df_path = save_gen_list(text_list=predictions,
+                            gt_list=references, name_list=names)
+    return wer_score

maintab_fast_conformer_10-09_01-54/conf_model.txt ADDED Viewed

	@@ -0,0 +1 @@

+ {'sample_rate': 16000, 'log_prediction': False, 'ctc_reduction': 'mean_volume', 'skip_nan_grad': False, 'train_ds': {'manifest_filepath': 'maintab_fast_conformer/_train_mf.json', 'sample_rate': 16000, 'batch_size': 16, 'shuffle': True, 'num_workers': 8, 'pin_memory': True, 'max_duration': 32.36, 'min_duration': 0.1, 'is_tarred': False, 'tarred_audio_filepaths': None, 'shuffle_n': 2048, 'bucketing_strategy': 'fully_randomized', 'bucketing_batch_size': None}, 'validation_ds': {'manifest_filepath': 'maintab_fast_conformer/_dev_mf.json', 'sample_rate': 16000, 'batch_size': 16, 'shuffle': False, 'use_start_end_token': False, 'num_workers': 8, 'pin_memory': True}, 'test_ds': {'manifest_filepath': 'V1_Setup/Out/1p/test_ds.json', 'sample_rate': 16000, 'batch_size': 16, 'shuffle': False, 'use_start_end_token': False, 'num_workers': 8, 'pin_memory': True}, 'tokenizer': {'dir': 'V1_Setup/Out/1p/', 'type': 'wpe'}, 'preprocessor': {'_target_': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'sample_rate': 16000, 'normalize': 'per_feature', 'window_size': 0.025, 'window_stride': 0.01, 'window': 'hann', 'features': 80, 'n_fft': 512, 'log': True, 'frame_splicing': 1, 'dither': 1e-05, 'pad_to': 0, 'pad_value': 0.0}, 'spec_augment': {'_target_': 'nemo.collections.asr.modules.SpectrogramAugmentation', 'freq_masks': 2, 'time_masks': 2, 'freq_width': 27, 'time_width': 0.05}, 'encoder': {'_target_': 'nemo.collections.asr.modules.ConformerEncoder', 'feat_in': 80, 'feat_out': -1, 'n_layers': 16, 'd_model': 256, 'subsampling': 'dw_striding', 'subsampling_factor': 8, 'subsampling_conv_channels': 256, 'causal_downsampling': False, 'ff_expansion_factor': 4, 'self_attention_model': 'rel_pos', 'n_heads': 4, 'att_context_size': [-1, -1], 'att_context_style': 'regular', 'xscaling': True, 'untie_biases': True, 'pos_emb_max_len': 5000, 'use_pytorch_sdpa': False, 'use_pytorch_sdpa_backends': [], 'conv_kernel_size': 9, 'conv_norm_type': 'batch_norm', 'conv_context_size': None, 'dropout': 0.1, 'dropout_pre_encoder': 0.1, 'dropout_emb': 0.0, 'dropout_att': 0.1, 'stochastic_depth_drop_prob': 0.0, 'stochastic_depth_mode': 'linear', 'stochastic_depth_start_layer': 1}, 'decoder': {'_target_': 'nemo.collections.asr.modules.ConvASRDecoder', 'feat_in': None, 'num_classes': -1, 'vocabulary': []}, 'interctc': {'loss_weights': [], 'apply_at_layers': []}, 'optim': {'name': 'adamw', 'lr': 0.001, 'betas': [0.9, 0.98], 'weight_decay': 0.001, 'sched': {'name': 'CosineAnnealing', 'warmup_steps': 15000, 'warmup_ratio': None, 'min_lr': 0.0001}}}

maintab_fast_conformer_10-09_01-54/git_pip_env.txt ADDED Viewed

	@@ -0,0 +1,895 @@

+a28ce37 run 1p fast conformer
+Author date: 2025-10-08 22:16:18 +0700
+Commit date: 2025-10-08 22:16:18 +0700
+Python 3.11.13
+absl-py==1.4.0
+accelerate==1.5.2
+aiofiles==22.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.13
+aiosignal==1.3.2
+aiosqlite==0.21.0
+alabaster==1.0.0
+albucore==0.0.24
+albumentations==2.0.8
+ale-py==0.11.1
+alembic==1.16.2
+altair==5.5.0
+annotated-types==0.7.0
+annoy==1.17.3
+ansicolors==1.1.8
+antlr4-python3-runtime==4.9.3
+anyio==4.9.0
+argon2-cffi==25.1.0
+argon2-cffi-bindings==21.2.0
+args==0.1.0
+array_record==0.7.2
+arrow==1.3.0
+arviz==0.21.0
+astropy==7.1.0
+astropy-iers-data==0.2025.6.23.0.39.50
+asttokens==3.0.0
+astunparse==1.6.3
+atpublic==5.1
+attrs==25.3.0
+audioread==3.0.1
+autograd==1.8.0
+babel==2.17.0
+backcall==0.2.0
+backports.tarfile==1.2.0
+bayesian-optimization==3.0.0
+beartype==0.21.0
+beautifulsoup4==4.13.4
+betterproto==2.0.0b6
+bigframes==2.8.0
+bigquery-magics==0.9.0
+bitsandbytes==0.46.0
+bleach==6.2.0
+blinker==1.9.0
+blis==1.3.0
+blobfile==3.0.0
+blosc2==3.5.0
+bokeh==3.7.3
+Boruta==0.4.3
+boto3==1.39.1
+botocore==1.39.1
+Bottleneck==1.4.2
+-e git+https://github.com/SohierDane/BigQuery_Helper@8615a7f6c1663e7f2d48aa2b32c2dbcb600a440f#egg=bq_helper
+bqplot==0.12.45
+braceexpand==0.1.7
+branca==0.8.1
+build==1.2.2.post1
+CacheControl==0.14.3
+cachetools==5.5.2
+Cartopy==0.24.1
+catalogue==2.0.10
+catboost==1.2.8
+category_encoders==2.7.0
+certifi==2025.6.15
+cesium==0.12.4
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+Chessnut==0.4.1
+chex==0.1.89
+clarabel==0.11.1
+click==8.2.1
+click-plugins==1.1.1.2
+cligj==0.7.2
+clint==0.5.1
+cloudpathlib==0.21.1
+cloudpickle==3.1.1
+cmake==3.31.6
+cmdstanpy==1.2.5
+colorama==0.4.6
+colorcet==3.1.0
+colorlog==6.9.0
+colorlover==0.3.0
+colour==0.1.5
+comm==0.2.2
+community==1.0.0b1
+confection==0.1.5
+cons==0.4.6
+contourpy==1.3.2
+coverage==7.9.1
+cramjam==2.10.0
+cryptography==44.0.3
+cuda-bindings==12.9.0
+cuda-python==12.9.0
+cudf-cu12==25.2.2
+cudf-polars-cu12==25.2.2
+cufflinks==0.17.3
+cuml-cu12==25.2.1
+cupy-cuda12x==13.4.1
+curl_cffi==0.11.4
+cuvs-cu12==25.2.1
+cvxopt==1.3.2
+cvxpy==1.6.6
+cycler==0.12.1
+cyipopt==1.5.0
+cymem==2.0.11
+Cython==3.0.12
+cytoolz==1.0.1
+daal==2025.6.1
+dacite==1.9.2
+dask==2024.12.1
+dask-cuda==25.2.0
+dask-cudf-cu12==25.2.2
+dask-expr==1.1.21
+dataclasses-json==0.6.7
+dataproc-spark-connect==0.7.5
+datascience==0.17.6
+datasets==3.6.0
+db-dtypes==1.4.3
+dbus-python==1.2.18
+deap==1.4.3
+debugpy==1.8.0
+decorator==4.4.2
+deepdiff==8.5.0
+defusedxml==0.7.1
+Deprecated==1.2.18
+diffusers==0.34.0
+dill==0.3.8
+dipy==1.11.0
+distributed==2024.12.1
+distributed-ucxx-cu12==0.42.0
+distro==1.9.0
+dlib==19.24.6
+dm-tree==0.1.9
+dnspython==2.7.0
+docker==7.1.0
+docopt==0.6.2
+docstring-to-markdown==0.17
+docstring_parser==0.16
+docutils==0.21.2
+dopamine_rl==4.1.2
+duckdb==1.2.2
+earthengine-api==1.5.21
+easydict==1.13
+easyocr==1.7.2
+editdistance==0.8.1
+eerepr==0.1.2
+einops==0.8.1
+eli5==0.13.0
+email_validator==2.2.0
+emoji==2.14.1
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
+entrypoints==0.4
+et_xmlfile==2.0.0
+etils==1.12.2
+etuples==0.3.9
+execnb==0.1.14
+Farama-Notifications==0.0.4
+fastai==2.7.19
+fastapi==0.115.13
+fastcore==1.7.29
+fastdownload==0.0.7
+fastjsonschema==2.21.1
+fastprogress==1.0.3
+fastrlock==0.8.3
+fasttext==0.9.3
+featuretools==1.31.0
+ffmpy==0.6.0
+fiddle==0.3.0
+filelock==3.18.0
+fiona==1.10.1
+firebase-admin==6.9.0
+Flask==3.1.1
+flatbuffers==25.2.10
+flax==0.10.6
+folium==0.19.7
+fonttools==4.58.4
+fqdn==1.5.1
+frozendict==2.4.6
+frozenlist==1.7.0
+fsspec==2024.12.0
+funcy==2.0
+fury==0.12.0
+future==1.0.0
+fuzzywuzzy==0.18.0
+gast==0.6.0
+gatspy==0.3
+gcsfs==2025.3.2
+GDAL==3.8.4
+gdown==5.2.0
+geemap==0.35.3
+gensim==4.3.3
+geocoder==1.38.1
+geographiclib==2.0
+geojson==3.2.0
+geopandas==0.14.4
+geopy==2.4.1
+ghapi==1.0.6
+gin-config==0.5.0
+gitdb==4.0.12
+GitPython==3.1.44
+glob2==0.7
+google==2.0.3
+google-ai-generativelanguage==0.6.15
+google-api-core==1.34.1
+google-api-python-client==2.173.0
+google-auth==2.40.3
+google-auth-httplib2==0.2.0
+google-auth-oauthlib==1.2.2
+google-cloud-aiplatform==1.99.0
+google-cloud-automl==1.0.1
+google-cloud-bigquery==3.25.0
+google-cloud-bigquery-connection==1.18.3
+google-cloud-core==2.4.3
+google-cloud-dataproc==5.20.0
+google-cloud-datastore==2.21.0
+google-cloud-firestore==2.21.0
+google-cloud-functions==1.20.4
+google-cloud-iam==2.19.1
+google-cloud-language==2.17.2
+google-cloud-resource-manager==1.14.2
+google-cloud-spanner==3.55.0
+google-cloud-storage==2.19.0
+google-cloud-translate==3.12.1
+google-cloud-videointelligence==2.16.2
+google-cloud-vision==3.10.2
+google-colab @ file:///colabtools/dist/google_colab-1.0.0.tar.gz
+google-crc32c==1.7.1
+google-genai==1.21.1
+google-generativeai==0.8.5
+google-pasta==0.2.0
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+googledrivedownloader==1.1.0
+gpxpy==1.6.2
+gradio==5.31.0
+gradio_client==1.10.1
+graphviz==0.21
+greenlet==3.2.3
+groovy==0.1.2
+grpc-google-iam-v1==0.14.2
+grpc-interceptor==0.15.4
+grpcio==1.73.1
+grpcio-status==1.49.0rc1
+grpclib==0.4.8
+gspread==6.2.1
+gspread-dataframe==4.0.0
+gym==0.25.2
+gym-notices==0.0.8
+gymnasium==0.29.0
+h11==0.16.0
+h2==4.2.0
+h2o==3.46.0.7
+h5netcdf==1.6.1
+h5py==3.14.0
+haversine==2.9.0
+hdbscan==0.8.40
+hep_ml==0.8.0
+hf-xet==1.1.5
+hf_transfer==0.1.9
+highspy==1.11.0
+holidays==0.75
+holoviews==1.20.2
+hpack==4.1.0
+html5lib==1.1
+htmlmin==0.1.12
+httpcore==1.0.9
+httpimport==1.4.1
+httplib2==0.22.0
+httpx==0.28.1
+huggingface-hub==0.33.1
+humanize==4.12.3
+hydra-core==1.3.2
+hyperframe==6.1.0
+hyperopt==0.2.7
+ibis-framework==9.5.0
+id==1.5.0
+idna==3.10
+igraph==0.11.9
+ImageHash==4.3.1
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+imagesize==1.4.1
+imbalanced-learn==0.13.0
+immutabledict==4.2.1
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+imutils==0.5.4
+in-toto-attestation==0.9.3
+indic_numtowords==1.1.0
+inflect==7.5.0
+iniconfig==2.1.0
+intel-cmplr-lib-rt==2024.2.0
+intel-cmplr-lib-ur==2024.2.0
+intel-openmp==2024.2.0
+intervaltree==3.1.0
+ipyevents==2.0.2
+ipyfilechooser==0.6.0
+ipykernel==6.17.1
+ipyleaflet==0.20.0
+ipympl==0.9.7
+ipyparallel==8.8.0
+ipython==7.34.0
+ipython-genutils==0.2.0
+ipython-sql==0.5.0
+ipytree==0.2.2
+ipywidgets==8.1.5
+isoduration==20.11.0
+isoweek==1.3.3
+itsdangerous==2.2.0
+Janome==0.5.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.2.1
+jax==0.5.2
+jax-cuda12-pjrt==0.5.1
+jax-cuda12-plugin==0.5.1
+jaxlib==0.5.1
+jedi==0.19.2
+jeepney==0.9.0
+jieba==0.42.1
+Jinja2==3.1.6
+jiter==0.10.0
+jiwer==3.1.0
+jmespath==1.0.1
+joblib==1.5.1
+json5==0.12.0
+jsonpatch==1.33
+jsonpickle==4.1.1
+jsonpointer==3.0.0
+jsonschema==4.24.0
+jsonschema-specifications==2025.4.1
+jupyter-console==6.1.0
+jupyter-events==0.12.0
+jupyter-leaflet==0.20.0
+jupyter-lsp==1.5.1
+jupyter-ydoc==0.2.5
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyter_kernel_gateway @ git+https://github.com/googlecolab/kernel_gateway@b134e9945df25c2dcb98ade9129399be10788671
+jupyter_server==2.12.5
+jupyter_server_fileid==0.9.3
+jupyter_server_terminals==0.5.3
+jupyter_server_ydoc==0.8.0
+jupyterlab==3.6.8
+jupyterlab-lsp==3.10.2
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.15
+jupytext==1.17.2
+kaggle==1.7.4.5
+kaggle-environments==1.17.6
+kagglehub==0.3.12
+kaldi-python-io==1.2.2
+keras==3.8.0
+keras-core==0.1.7
+keras-cv==0.9.0
+keras-hub==0.18.1
+keras-nlp==0.18.1
+keras-tuner==1.4.7
+keyring==25.6.0
+keyrings.google-artifactregistry-auth==1.1.2
+kiwisolver==1.4.8
+kornia==0.8.1
+kornia_rs==0.1.9
+kt-legacy==1.0.5
+langchain==0.3.26
+langchain-core==0.3.66
+langchain-text-splitters==0.3.8
+langcodes==3.5.0
+langid==1.1.6
+langsmith==0.4.1
+language_data==1.3.0
+launchpadlib==1.10.16
+lazr.restfulclient==0.14.4
+lazr.uri==1.0.6
+lazy_loader==0.4
+learntools @ git+https://github.com/Kaggle/learntools@003526b6ef9d864326e2a099599de3380b4cc94c
+Levenshtein==0.27.1
+lhotse==1.31.1
+libclang==18.1.1
+libcst==1.8.5
+libcudf-cu12==25.2.2
+libcugraph-cu12==25.2.0
+libcuml-cu12==25.2.1
+libcuvs-cu12==25.2.1
+libkvikio-cu12==25.2.1
+libpysal==4.9.2
+libraft-cu12==25.2.0
+librosa==0.11.0
+libucx-cu12==1.18.1
+libucxx-cu12==0.42.0
+lightgbm @ file:///tmp/lightgbm/LightGBM/dist/lightgbm-4.5.0-py3-none-linux_x86_64.whl
+lightning==2.4.0
+lightning-utilities==0.14.3
+lilcom==1.8.1
+lime==0.2.0.1
+line_profiler==4.2.0
+linkify-it-py==2.0.3
+llvmlite==0.43.0
+lml==0.2.0
+locket==1.0.0
+logical-unification==0.4.6
+loguru==0.7.3
+lxml==5.4.0
+Mako==1.3.10
+mamba==0.11.3
+marisa-trie==1.2.1
+Markdown==3.8.2
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+matplotlib==3.7.2
+matplotlib-inline==0.1.7
+matplotlib-venn==1.1.2
+mdit-py-plugins==0.4.2
+mdurl==0.1.2
+mediapy==1.1.6
+miniKanren==1.0.3
+missingno==0.5.2
+mistune==0.8.4
+mizani==0.13.5
+mkl==2025.2.0
+mkl-fft==1.3.8
+mkl-random==1.2.4
+mkl-service==2.4.1
+mkl-umath==0.1.1
+ml-dtypes==0.4.1
+ml_collections==1.1.0
+mlcrate==0.2.0
+mlxtend==0.23.4
+mne==1.9.0
+model-signing==1.0.1
+more-itertools==10.7.0
+moviepy==1.0.3
+mpld3==0.5.10
+mpmath==1.3.0
+msgpack==1.1.1
+multidict==6.6.3
+multimethod==1.12
+multipledispatch==1.0.0
+multiprocess==0.70.16
+multitasking==0.0.11
+murmurhash==1.0.13
+music21==9.3.0
+mypy_extensions==1.1.0
+namex==0.1.0
+narwhals==1.44.0
+natsort==8.4.0
+nbclassic==1.3.1
+nbclient==0.5.13
+nbconvert==6.4.5
+nbdev==2.3.36
+nbformat==5.10.4
+ndindex==1.10.0
+nemo-toolkit==2.5.0
+nest-asyncio==1.6.0
+networkx==3.5
+nibabel==5.3.2
+nilearn==0.10.4
+ninja==1.11.1.4
+nltk==3.9.1
+notebook==6.5.4
+notebook_shim==0.2.4
+num2words==0.5.14
+numba==0.60.0
+numba-cuda==0.2.0
+numexpr==2.11.0
+numpy==1.26.4
+nvidia-cublas-cu12==12.4.5.8
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvcc-cu12==12.5.82
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.2.1.3
+nvidia-curand-cu12==10.3.5.147
+nvidia-cusolver-cu12==11.6.1.9
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cusparselt-cu12==0.6.2
+nvidia-ml-py==12.575.51
+nvidia-nccl-cu12==2.21.5
+nvidia-nvcomp-cu12==4.2.0.11
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.4.127
+nvtx==0.2.12
+nx-cugraph-cu12 @ https://pypi.nvidia.com/nx-cugraph-cu12/nx_cugraph_cu12-25.2.0-py3-none-any.whl
+oauth2client==4.1.3
+oauthlib==3.3.1
+odfpy==1.4.1
+olefile==0.47
+omegaconf==2.3.0
+onnx==1.18.0
+open_spiel==1.6
+openai==1.91.0
+opencv-contrib-python==4.11.0.86
+opencv-python==4.11.0.86
+opencv-python-headless==4.11.0.86
+openpyxl==3.1.5
+openslide-bin==4.0.0.8
+openslide-python==1.4.2
+opt_einsum==3.4.0
+optax==0.2.5
+optree==0.16.0
+optuna==4.4.0
+orbax-checkpoint==0.11.16
+orderly-set==5.4.1
+orjson==3.10.18
+osqp==1.0.4
+overrides==7.7.0
+packaging==24.2
+pandas==2.2.3
+pandas-datareader==0.10.0
+pandas-gbq==0.29.1
+pandas-profiling==3.6.6
+pandas-stubs==2.2.2.240909
+pandasql==0.7.3
+pandocfilters==1.5.1
+panel==1.7.1
+papermill==2.6.0
+param==2.2.1
+parso==0.8.4
+parsy==2.1
+partd==1.4.2
+path==17.1.0
+path.py==12.5.0
+pathlib==1.0.1
+pathos==0.3.1
+patsy==1.0.1
+pdf2image==1.17.0
+peewee==3.18.1
+peft==0.15.2
+pettingzoo==1.24.0
+pexpect==4.9.0
+phik==0.12.4
+pickleshare==0.7.5
+pillow==11.2.1
+plac==1.4.5
+platformdirs==4.3.8
+plotly==5.24.1
+plotly-express==0.4.1
+plotnine==0.14.5
+pluggy==1.6.0
+plum-dispatch==2.5.7
+ply==3.11
+polars==1.21.0
+pooch==1.8.2
+portpicker==1.5.2
+pox==0.3.6
+ppft==1.7.7
+preprocessing==0.1.13
+preshed==3.0.10
+prettytable==3.16.0
+proglog==0.1.12
+progressbar2==4.5.0
+prometheus_client==0.22.1
+promise==2.3
+prompt_toolkit==3.0.51
+propcache==0.3.2
+prophet==1.1.7
+proto-plus==1.26.1
+protobuf==5.29.5
+psutil==7.0.0
+psycopg2==2.9.10
+ptyprocess==0.7.0
+pudb==2025.1
+puremagic==1.29
+py-cpuinfo==9.0.0
+py4j==0.10.9.7
+pyaml==25.5.0
+pyannote.core==5.0.0
+pyannote.database==5.1.3
+pyannote.metrics==3.2.1
+PyArabic==0.6.15
+pyarrow==19.0.1
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pybind11==2.13.6
+pycairo==1.28.0
+pyclipper==1.3.0.post6
+pycocotools==2.0.10
+pycparser==2.22
+pycryptodome==3.23.0
+pycryptodomex==3.23.0
+pycuda==2025.1.1
+pydantic==2.11.7
+pydantic_core==2.33.2
+pydata-google-auth==1.9.1
+pydegensac==0.1.2
+pydicom==3.0.1
+pydot==3.0.4
+pydotplus==2.0.2
+PyDrive==1.3.1
+PyDrive2==1.21.3
+pydub==0.25.1
+pyemd==1.0.0
+pyerfa==2.0.1.5
+pyexcel-io==0.6.7
+pyexcel-ods==0.6.0
+pygame==2.6.1
+pygit2==1.18.0
+pygltflib==1.16.4
+Pygments==2.19.2
+PyGObject==3.42.0
+PyJWT==2.10.1
+pyLDAvis==3.4.1
+pylibcudf-cu12==25.2.2
+pylibcugraph-cu12==25.2.0
+pylibraft-cu12==25.2.0
+pyloudnorm==0.1.1
+pymc==5.23.0
+pymc3==3.11.4
+pymongo==4.13.2
+Pympler==1.1
+pymystem3==0.2.0
+pynndescent==0.5.13
+pynvjitlink-cu12==0.5.2
+pynvml==12.0.0
+pyogrio==0.11.0
+pyomo==6.9.2
+PyOpenGL==3.1.9
+pyOpenSSL==25.1.0
+pyparsing==3.0.9
+pypdf==5.7.0
+pyperclip==1.9.0
+pyproj==3.7.1
+pyproject_hooks==1.2.0
+pyshp==2.3.1
+PySocks==1.7.1
+pyspark==3.5.1
+pytensor==2.31.4
+pytesseract==0.3.13
+pytest==8.3.5
+python-apt==0.0.0
+python-bidi==0.6.6
+python-box==7.3.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-json-logger==3.3.0
+python-louvain==0.16
+python-lsp-jsonrpc==1.1.2
+python-lsp-server==1.12.2
+python-multipart==0.0.20
+python-slugify==8.0.4
+python-snappy==0.7.3
+python-utils==3.9.1
+pytools==2025.1.7
+pytorch-ignite==0.5.2
+pytorch-lightning==2.5.1.post0
+pytz==2025.2
+PyUpSet==0.1.1.post7
+pyviz_comms==3.0.6
+PyWavelets==1.8.0
+PyYAML==6.0.2
+pyzmq==24.0.1
+qgrid==1.3.1
+qtconsole==5.6.1
+QtPy==2.4.3
+raft-dask-cu12==25.2.0
+RapidFuzz==3.14.1
+rapids-dask-dependency==25.2.0
+ratelim==0.1.6
+ray==2.47.1
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.4
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.9.0
+resampy==0.4.3
+rfc3161-client==1.0.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rfc8785==0.1.4
+rgf-python==3.12.0
+rich==14.0.0
+rmm-cu12==25.2.0
+roman-numerals-py==3.1.0
+rpds-py==0.25.1
+rpy2==3.5.17
+rsa==4.9.1
+rtree==1.4.0
+ruamel.yaml==0.18.15
+ruamel.yaml.clib==0.2.14
+ruff==0.12.0
+s3fs==0.4.2
+s3transfer==0.13.0
+sacremoses==0.1.1
+safehttpx==0.1.6
+safetensors==0.5.3
+scikit-image==0.25.2
+scikit-learn==1.2.2
+scikit-learn-intelex==2025.6.1
+scikit-multilearn==0.2.0
+scikit-optimize==0.10.2
+scikit-plot==0.3.7
+scikit-surprise==1.1.4
+scipy==1.15.2
+scooby==0.10.1
+scs==3.2.7.post2
+seaborn==0.12.2
+SecretStorage==3.3.3
+securesystemslib==1.3.0
+segment_anything @ git+https://github.com/facebookresearch/segment-anything.git@dca509fe793f601edb92606367a655c15ac00fdf
+semantic-version==2.10.0
+semver==3.0.4
+Send2Trash==1.8.3
+sentence-transformers==4.1.0
+sentencepiece==0.2.0
+sentry-sdk==2.31.0
+setproctitle==1.3.6
+setuptools-scm==8.3.1
+shap==0.44.1
+shapely==2.1.1
+shellingham==1.5.4
+Shimmy==1.3.0
+sigstore==3.6.4
+sigstore-protobuf-specs==0.3.2
+sigstore-rekor-types==0.0.18
+simple-parsing==0.1.7
+simpleitk==2.5.2
+simplejson==3.20.1
+simsimd==6.4.9
+siphash24==1.7
+six==1.17.0
+sklearn-compat==0.1.3
+sklearn-pandas==2.2.0
+slicer==0.0.7
+smart-open==7.1.0
+smmap==5.0.2
+sniffio==1.3.1
+snowballstemmer==3.0.1
+sortedcontainers==2.4.0
+soundfile==0.13.1
+soupsieve==2.7
+sox==1.5.0
+soxr==0.5.0.post1
+spacy==3.8.7
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+spanner-graph-notebook==1.1.7
+Sphinx==8.2.3
+sphinx-rtd-theme==0.2.4
+sphinxcontrib-applehelp==2.0.0
+sphinxcontrib-devhelp==2.0.0
+sphinxcontrib-htmlhelp==2.1.0
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==2.0.0
+sphinxcontrib-serializinghtml==2.0.0
+SQLAlchemy==2.0.41
+sqlglot==25.20.2
+sqlparse==0.5.3
+squarify==0.4.4
+srsly==2.5.1
+stable-baselines3==2.1.0
+stanio==0.5.1
+starlette==0.46.2
+statsmodels==0.14.4
+stopit==1.1.2
+stringzilla==3.12.5
+stumpy==1.13.0
+sympy==1.13.1
+tables==3.10.2
+tabulate==0.9.0
+tbb==2022.2.0
+tbb4py==2022.2.0
+tblib==3.1.0
+tcmlib==1.4.0
+tenacity==8.5.0
+tensorboard==2.18.0
+tensorboard-data-server==0.7.2
+tensorflow==2.18.0
+tensorflow-cloud==0.1.5
+tensorflow-datasets==4.9.9
+tensorflow-hub==0.16.1
+tensorflow-io==0.37.1
+tensorflow-io-gcs-filesystem==0.37.1
+tensorflow-metadata==1.17.2
+tensorflow-probability==0.25.0
+tensorflow-text==2.18.1
+tensorflow_decision_forests==1.11.0
+tensorstore==0.1.74
+termcolor==3.1.0
+terminado==0.18.1
+testpath==0.6.0
+text-unidecode==1.3
+textblob==0.19.0
+texterrors==0.5.1
+texttable==1.7.0
+tf-slim==1.1.0
+tf_keras==2.18.0
+Theano==1.0.5
+Theano-PyMC==1.1.2
+thinc==8.3.6
+threadpoolctl==3.6.0
+tifffile==2025.6.11
+tiktoken==0.9.0
+timm==1.0.15
+tinycss2==1.4.0
+tokenizers==0.21.2
+toml==0.10.2
+tomlkit==0.13.3
+toolz==1.0.0
+torch @ https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl
+torchao==0.10.0
+torchaudio @ https://download.pytorch.org/whl/cu124/torchaudio-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl
+torchdata==0.11.0
+torchinfo==1.8.0
+torchmetrics==1.7.3
+torchsummary==1.5.1
+torchtune==0.6.1
+torchvision @ https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp311-cp311-linux_x86_64.whl
+tornado==6.5.1
+TPOT==0.12.1
+tqdm==4.67.1
+traitlets==5.7.1
+traittypes==0.2.1
+transformers==4.53.3
+treelite==4.4.1
+treescope==0.1.9
+triton==3.2.0
+trx-python==0.3
+tsfresh==0.21.0
+tuf==6.0.0
+tweepy==4.15.0
+typeguard==4.4.4
+typer==0.16.0
+types-python-dateutil==2.9.0.20250516
+types-pytz==2025.2.0.20250516
+types-setuptools==80.9.0.20250529
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+tzdata==2025.2
+tzlocal==5.3.1
+uc-micro-py==1.0.3
+ucx-py-cu12==0.42.0
+ucxx-cu12==0.42.0
+ujson==5.10.0
+umap-learn==0.5.7
+umf==0.11.0
+update-checker==0.18.0
+uri-template==1.3.0
+uritemplate==4.2.0
+urllib3==2.5.0
+urwid==3.0.2
+urwid_readline==0.15.1
+uvicorn==0.34.3
+vega-datasets==0.9.0
+visions==0.8.1
+vtk==9.3.1
+wadllib==1.3.6
+Wand==0.6.13
+wandb==0.20.1
+wasabi==1.1.3
+watchdog==6.0.0
+wavio==0.0.9
+wcwidth==0.2.13
+weasel==0.4.1
+webcolors==24.11.1
+webdataset==1.0.2
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==15.0.1
+Werkzeug==3.1.3
+wget==3.2
+whisper_normalizer==0.1.12
+widgetsnbextension==4.0.14
+woodwork==0.31.0
+wordcloud==1.9.4
+wrapt==1.17.2
+wurlitzer==3.1.1
+xarray==2025.3.1
+xarray-einstats==0.9.1
+xgboost==2.0.3
+xlrd==2.0.2
+xvfbwrapper==0.2.13
+xxhash==3.5.0
+xyzservices==2025.4.0
+y-py==0.6.2
+yarl==1.20.1
+ydata-profiling==4.16.1
+ydf==0.9.0
+yellowbrick==1.5
+yfinance==0.2.63
+ypy-websocket==0.8.4
+zict==3.0.0
+zipp==3.23.0
+zstandard==0.23.0

maintab_fast_conformer_10-09_01-54/model_avg.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25cafd383db36a0454a708f0b0555a5eb6afcb23e96f3adc5692787482c5c357
+size 111337722

maintab_fast_conformer_10-09_01-54/model_mp.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+  | Name              | Type                              | Params | Mode
+--------------------------------------------------------------------------------
+0 | preprocessor      | AudioToMelSpectrogramPreprocessor | 0      | train
+1 | encoder           | ConformerEncoder                  | 26.1 M | train
+2 | decoder           | ConvASRDecoder                    | 1.6 M  | train
+3 | loss              | CTCLoss                           | 0      | train
+4 | spec_augmentation | SpectrogramAugmentation           | 0      | train
+5 | wer               | WER                               | 0      | train
+--------------------------------------------------------------------------------
+27.8 M    Trainable params
+0         Non-trainable params
+27.8 M    Total params
+111.012   Total estimated model params size (MB)
+471       Modules in train mode
+0         Modules in eval mode

maintab_fast_conformer_10-09_01-54/nemo_model_avg.nemo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30ac7e57a46f8d38407a755f6c1b28b6414914374dbde26538d025677016842c
+size 111636480

maintab_fast_conformer_10-09_01-54/training_process_100.png ADDED Viewed

maintab_fast_conformer_10-09_01-54/training_process_112.png ADDED Viewed

maintab_fast_conformer_10-09_01-54/training_process_42.png ADDED Viewed