Update model_config.yaml
Browse files- model_config.yaml +7 -7
model_config.yaml
CHANGED
|
@@ -80,7 +80,7 @@ reduce_amax: true
|
|
| 80 |
use_emha: false
|
| 81 |
optim:
|
| 82 |
name: distributed_fused_adam
|
| 83 |
-
lr: 3.
|
| 84 |
weight_decay: 0.1
|
| 85 |
betas:
|
| 86 |
- 0.9
|
|
@@ -110,7 +110,7 @@ data:
|
|
| 110 |
num_workers: 2
|
| 111 |
dataloader_type: single
|
| 112 |
train_ds:
|
| 113 |
-
file_path: /dataset/
|
| 114 |
global_batch_size: 128
|
| 115 |
micro_batch_size: 1
|
| 116 |
shuffle: true
|
|
@@ -153,7 +153,7 @@ data:
|
|
| 153 |
hf_dataset: true
|
| 154 |
truncation_method: right
|
| 155 |
validation_ds:
|
| 156 |
-
file_path: /dataset/
|
| 157 |
names: null
|
| 158 |
global_batch_size: 128
|
| 159 |
micro_batch_size: 1
|
|
@@ -238,13 +238,13 @@ data:
|
|
| 238 |
index_mapping_dir: /indexmap_dir
|
| 239 |
data_prefix:
|
| 240 |
train:
|
| 241 |
-
- /datasets/
|
| 242 |
validation:
|
| 243 |
-
- /datasets/
|
| 244 |
test:
|
| 245 |
-
- /datasets/
|
| 246 |
answer_only_loss: true
|
| 247 |
-
restore_from_path: /models/
|
| 248 |
save_nemo_on_validation_end: true
|
| 249 |
use_flash_attention: null
|
| 250 |
pipeline_model_parallel_split_rank: 0
|
|
|
|
| 80 |
use_emha: false
|
| 81 |
optim:
|
| 82 |
name: distributed_fused_adam
|
| 83 |
+
lr: 3.001e-07
|
| 84 |
weight_decay: 0.1
|
| 85 |
betas:
|
| 86 |
- 0.9
|
|
|
|
| 110 |
num_workers: 2
|
| 111 |
dataloader_type: single
|
| 112 |
train_ds:
|
| 113 |
+
file_path: /dataset/train.jsonl
|
| 114 |
global_batch_size: 128
|
| 115 |
micro_batch_size: 1
|
| 116 |
shuffle: true
|
|
|
|
| 153 |
hf_dataset: true
|
| 154 |
truncation_method: right
|
| 155 |
validation_ds:
|
| 156 |
+
file_path: /dataset/val.jsonl
|
| 157 |
names: null
|
| 158 |
global_batch_size: 128
|
| 159 |
micro_batch_size: 1
|
|
|
|
| 238 |
index_mapping_dir: /indexmap_dir
|
| 239 |
data_prefix:
|
| 240 |
train:
|
| 241 |
+
- /datasets/train.jsonl
|
| 242 |
validation:
|
| 243 |
+
- /datasets/val.jsonl
|
| 244 |
test:
|
| 245 |
+
- /datasets/val.jsonl
|
| 246 |
answer_only_loss: true
|
| 247 |
+
restore_from_path: /models/340B_base
|
| 248 |
save_nemo_on_validation_end: true
|
| 249 |
use_flash_attention: null
|
| 250 |
pipeline_model_parallel_split_rank: 0
|