abarbosa commited on
Commit
6364f72
·
verified ·
1 Parent(s): 3b16f10

Pushing fine-tuned model to Hugging Face Hub

Browse files
README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ language:
4
+ - pt
5
+ - en
6
+ tags:
7
+ - aes
8
+ datasets:
9
+ - kamel-usp/aes_enem_dataset
10
+ base_model: meta-llama/Llama-3.1-8B
11
+ metrics:
12
+ - accuracy
13
+ - qwk
14
+ library_name: peft
15
+ model-index:
16
+ - name: llama31_8b-balanced-C5
17
+ results:
18
+ - task:
19
+ type: text-classification
20
+ name: Automated Essay Score
21
+ dataset:
22
+ name: Automated Essay Score ENEM Dataset
23
+ type: kamel-usp/aes_enem_dataset
24
+ config: JBCS2025
25
+ split: test
26
+ metrics:
27
+ - name: Macro F1
28
+ type: F1
29
+ value: 0.2873977873977874
30
+ - name: QWK
31
+ type: qwk
32
+ value: 0.400524367674275
33
+ ---
34
+ # Model ID: llama31_8b-balanced-C5
35
+ ## Results
36
+ | | test_data |
37
+ |:-----------------|------------:|
38
+ | eval_accuracy | 0.289855 |
39
+ | eval_RMSE | 62.6006 |
40
+ | eval_QWK | 0.400524 |
41
+ | eval_Macro_F1 | 0.287398 |
42
+ | eval_Micro_F1 | 0.289855 |
43
+ | eval_Weighted_F1 | 0.279631 |
44
+ | eval_HDIV | 0.101449 |
45
+
adapter_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 16,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0.05,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": [
21
+ "classifier",
22
+ "score"
23
+ ],
24
+ "peft_type": "LORA",
25
+ "r": 8,
26
+ "rank_pattern": {},
27
+ "revision": null,
28
+ "target_modules": [
29
+ "gate_proj",
30
+ "o_proj",
31
+ "q_proj",
32
+ "up_proj",
33
+ "down_proj",
34
+ "k_proj",
35
+ "v_proj"
36
+ ],
37
+ "task_type": "SEQ_CLS",
38
+ "use_dora": false,
39
+ "use_rslora": false
40
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac810144d9e5bf0f4fa28cd8d420deb83d1fdb0e0bf95b494d360c33af0e182
3
+ size 83994544
run_experiment.log ADDED
@@ -0,0 +1,1124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-03-17 22:47:57,195][__main__][INFO] - cache_dir: /media/data/tmp
2
+ dataset:
3
+ name: kamel-usp/aes_enem_dataset
4
+ split: JBCS2025
5
+ training_params:
6
+ seed: 42
7
+ num_train_epochs: 20
8
+ logging_steps: 100
9
+ metric_for_best_model: QWK
10
+ bf16: true
11
+ post_training_results:
12
+ model_path: /workspace/jbcs2025/outputs/2025-03-17/18-44-41
13
+ experiments:
14
+ model:
15
+ name: meta-llama/Llama-3.1-8B
16
+ type: llama31_classification_lora
17
+ num_labels: 6
18
+ output_dir: ./results/llama31_8b-balanced/C5
19
+ logging_dir: ./logs/llama31_8b-balanced/C5
20
+ best_model_dir: ./results/llama31_8b-balanced/C5/best_model
21
+ lora_r: 8
22
+ lora_dropout: 0.05
23
+ lora_alpha: 16
24
+ lora_target_modules: all-linear
25
+ dataset:
26
+ grade_index: 4
27
+ training_id: llama31_8b-balanced-C5
28
+ training_params:
29
+ weight_decay: 0.01
30
+ warmup_ratio: 0.1
31
+ learning_rate: 5.0e-05
32
+ train_batch_size: 1
33
+ eval_batch_size: 2
34
+ gradient_accumulation_steps: 16
35
+ gradient_checkpointing: false
36
+
37
+ [2025-03-17 22:47:57,197][__main__][INFO] - Starting the Fine Tuning training process.
38
+ [2025-03-17 22:48:03,239][transformers.tokenization_utils_base][INFO] - loading file tokenizer.json from cache at /media/data/tmp/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/tokenizer.json
39
+ [2025-03-17 22:48:03,239][transformers.tokenization_utils_base][INFO] - loading file tokenizer.model from cache at None
40
+ [2025-03-17 22:48:03,239][transformers.tokenization_utils_base][INFO] - loading file added_tokens.json from cache at None
41
+ [2025-03-17 22:48:03,239][transformers.tokenization_utils_base][INFO] - loading file special_tokens_map.json from cache at /media/data/tmp/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/special_tokens_map.json
42
+ [2025-03-17 22:48:03,239][transformers.tokenization_utils_base][INFO] - loading file tokenizer_config.json from cache at /media/data/tmp/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/tokenizer_config.json
43
+ [2025-03-17 22:48:03,239][transformers.tokenization_utils_base][INFO] - loading file chat_template.jinja from cache at None
44
+ [2025-03-17 22:48:03,637][transformers.tokenization_utils_base][INFO] - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
45
+ [2025-03-17 22:48:03,735][__main__][INFO] - Tokenizer function parameters- Padding:longest; Truncation: False
46
+ [2025-03-17 22:48:05,848][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /media/data/tmp/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
47
+ [2025-03-17 22:48:05,849][transformers.configuration_utils][INFO] - Model config LlamaConfig {
48
+ "_name_or_path": "meta-llama/Llama-3.1-8B",
49
+ "architectures": [
50
+ "LlamaForCausalLM"
51
+ ],
52
+ "attention_bias": false,
53
+ "attention_dropout": 0.0,
54
+ "bos_token_id": 128000,
55
+ "eos_token_id": 128001,
56
+ "head_dim": 128,
57
+ "hidden_act": "silu",
58
+ "hidden_size": 4096,
59
+ "id2label": {
60
+ "0": 0,
61
+ "1": 40,
62
+ "2": 80,
63
+ "3": 120,
64
+ "4": 160,
65
+ "5": 200
66
+ },
67
+ "initializer_range": 0.02,
68
+ "intermediate_size": 14336,
69
+ "label2id": {
70
+ "0": 0,
71
+ "40": 1,
72
+ "80": 2,
73
+ "120": 3,
74
+ "160": 4,
75
+ "200": 5
76
+ },
77
+ "max_position_embeddings": 131072,
78
+ "mlp_bias": false,
79
+ "model_type": "llama",
80
+ "num_attention_heads": 32,
81
+ "num_hidden_layers": 32,
82
+ "num_key_value_heads": 8,
83
+ "pretraining_tp": 1,
84
+ "rms_norm_eps": 1e-05,
85
+ "rope_scaling": {
86
+ "factor": 8.0,
87
+ "high_freq_factor": 4.0,
88
+ "low_freq_factor": 1.0,
89
+ "original_max_position_embeddings": 8192,
90
+ "rope_type": "llama3"
91
+ },
92
+ "rope_theta": 500000.0,
93
+ "tie_word_embeddings": false,
94
+ "torch_dtype": "bfloat16",
95
+ "transformers_version": "4.49.0",
96
+ "use_cache": true,
97
+ "vocab_size": 128256
98
+ }
99
+
100
+ [2025-03-17 22:48:05,879][transformers.modeling_utils][INFO] - loading weights file model.safetensors from cache at /media/data/tmp/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/model.safetensors.index.json
101
+ [2025-03-17 22:48:05,880][transformers.modeling_utils][INFO] - Will use torch_dtype=torch.bfloat16 as defined in model's config object
102
+ [2025-03-17 22:48:05,880][transformers.modeling_utils][INFO] - Instantiating LlamaForSequenceClassification model under default dtype torch.bfloat16.
103
+ [2025-03-17 22:48:27,762][transformers.modeling_utils][INFO] - Some weights of the model checkpoint at meta-llama/Llama-3.1-8B were not used when initializing LlamaForSequenceClassification: {'lm_head.weight'}
104
+ - This IS expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
105
+ - This IS NOT expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
106
+ [2025-03-17 22:48:27,762][transformers.modeling_utils][WARNING] - Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.1-8B and are newly initialized: ['score.weight']
107
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
108
+ [2025-03-17 22:48:30,014][__main__][INFO] - None
109
+ [2025-03-17 22:48:30,015][transformers.training_args][INFO] - PyTorch: setting up devices
110
+ [2025-03-17 22:48:30,024][__main__][INFO] - Total steps: 620. Number of warmup steps: 62
111
+ [2025-03-17 22:48:30,031][transformers.trainer][INFO] - You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
112
+ [2025-03-17 22:48:30,056][transformers.trainer][INFO] - Using auto half precision backend
113
+ [2025-03-17 22:48:30,057][transformers.trainer][WARNING] - No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
114
+ [2025-03-17 22:48:30,104][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
115
+ [2025-03-17 22:48:30,128][transformers.trainer][INFO] -
116
+ ***** Running Evaluation *****
117
+ [2025-03-17 22:48:30,128][transformers.trainer][INFO] - Num examples = 132
118
+ [2025-03-17 22:48:30,129][transformers.trainer][INFO] - Batch size = 2
119
+ [2025-03-17 22:49:44,277][transformers][INFO] - {'accuracy': 0.24242424242424243, 'RMSE': 64.8541487189571, 'QWK': 0.03236437383809365, 'HDIV': 0.12878787878787878, 'Macro_F1': 0.09051808406647116, 'Micro_F1': 0.24242424242424243, 'Weighted_F1': 0.13990935750466543}
120
+ [2025-03-17 22:49:44,505][transformers.trainer][INFO] - The following columns in the training set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
121
+ [2025-03-17 22:49:44,569][transformers.trainer][INFO] - ***** Running training *****
122
+ [2025-03-17 22:49:44,569][transformers.trainer][INFO] - Num examples = 500
123
+ [2025-03-17 22:49:44,569][transformers.trainer][INFO] - Num Epochs = 20
124
+ [2025-03-17 22:49:44,569][transformers.trainer][INFO] - Instantaneous batch size per device = 1
125
+ [2025-03-17 22:49:44,569][transformers.trainer][INFO] - Total train batch size (w. parallel, distributed & accumulation) = 16
126
+ [2025-03-17 22:49:44,569][transformers.trainer][INFO] - Gradient Accumulation steps = 16
127
+ [2025-03-17 22:49:44,569][transformers.trainer][INFO] - Total optimization steps = 620
128
+ [2025-03-17 22:49:44,574][transformers.trainer][INFO] - Number of trainable parameters = 20,996,096
129
+ [2025-03-17 23:06:28,642][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
130
+ [2025-03-17 23:06:28,646][transformers.trainer][INFO] -
131
+ ***** Running Evaluation *****
132
+ [2025-03-17 23:06:28,646][transformers.trainer][INFO] - Num examples = 132
133
+ [2025-03-17 23:06:28,646][transformers.trainer][INFO] - Batch size = 2
134
+ [2025-03-17 23:07:44,336][transformers][INFO] - {'accuracy': 0.25, 'RMSE': 75.3979342213597, 'QWK': -0.16499811817839682, 'HDIV': 0.23484848484848486, 'Macro_F1': 0.09090554254488681, 'Micro_F1': 0.25, 'Weighted_F1': 0.14577389823291462}
135
+ [2025-03-17 23:07:44,341][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-32
136
+ [2025-03-17 23:07:44,833][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
137
+ [2025-03-17 23:07:44,834][transformers.configuration_utils][INFO] - Model config LlamaConfig {
138
+ "architectures": [
139
+ "LlamaForCausalLM"
140
+ ],
141
+ "attention_bias": false,
142
+ "attention_dropout": 0.0,
143
+ "bos_token_id": 128000,
144
+ "eos_token_id": 128001,
145
+ "head_dim": 128,
146
+ "hidden_act": "silu",
147
+ "hidden_size": 4096,
148
+ "initializer_range": 0.02,
149
+ "intermediate_size": 14336,
150
+ "max_position_embeddings": 131072,
151
+ "mlp_bias": false,
152
+ "model_type": "llama",
153
+ "num_attention_heads": 32,
154
+ "num_hidden_layers": 32,
155
+ "num_key_value_heads": 8,
156
+ "pretraining_tp": 1,
157
+ "rms_norm_eps": 1e-05,
158
+ "rope_scaling": {
159
+ "factor": 8.0,
160
+ "high_freq_factor": 4.0,
161
+ "low_freq_factor": 1.0,
162
+ "original_max_position_embeddings": 8192,
163
+ "rope_type": "llama3"
164
+ },
165
+ "rope_theta": 500000.0,
166
+ "tie_word_embeddings": false,
167
+ "torch_dtype": "bfloat16",
168
+ "transformers_version": "4.49.0",
169
+ "use_cache": true,
170
+ "vocab_size": 128256
171
+ }
172
+
173
+ [2025-03-17 23:24:32,338][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
174
+ [2025-03-17 23:24:32,341][transformers.trainer][INFO] -
175
+ ***** Running Evaluation *****
176
+ [2025-03-17 23:24:32,341][transformers.trainer][INFO] - Num examples = 132
177
+ [2025-03-17 23:24:32,342][transformers.trainer][INFO] - Batch size = 2
178
+ [2025-03-17 23:25:47,988][transformers][INFO] - {'accuracy': 0.23484848484848486, 'RMSE': 66.14950926316519, 'QWK': 0.03725553580087271, 'HDIV': 0.12878787878787878, 'Macro_F1': 0.09814814814814815, 'Micro_F1': 0.23484848484848486, 'Weighted_F1': 0.1525252525252525}
179
+ [2025-03-17 23:25:47,991][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-64
180
+ [2025-03-17 23:25:48,457][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
181
+ [2025-03-17 23:25:48,457][transformers.configuration_utils][INFO] - Model config LlamaConfig {
182
+ "architectures": [
183
+ "LlamaForCausalLM"
184
+ ],
185
+ "attention_bias": false,
186
+ "attention_dropout": 0.0,
187
+ "bos_token_id": 128000,
188
+ "eos_token_id": 128001,
189
+ "head_dim": 128,
190
+ "hidden_act": "silu",
191
+ "hidden_size": 4096,
192
+ "initializer_range": 0.02,
193
+ "intermediate_size": 14336,
194
+ "max_position_embeddings": 131072,
195
+ "mlp_bias": false,
196
+ "model_type": "llama",
197
+ "num_attention_heads": 32,
198
+ "num_hidden_layers": 32,
199
+ "num_key_value_heads": 8,
200
+ "pretraining_tp": 1,
201
+ "rms_norm_eps": 1e-05,
202
+ "rope_scaling": {
203
+ "factor": 8.0,
204
+ "high_freq_factor": 4.0,
205
+ "low_freq_factor": 1.0,
206
+ "original_max_position_embeddings": 8192,
207
+ "rope_type": "llama3"
208
+ },
209
+ "rope_theta": 500000.0,
210
+ "tie_word_embeddings": false,
211
+ "torch_dtype": "bfloat16",
212
+ "transformers_version": "4.49.0",
213
+ "use_cache": true,
214
+ "vocab_size": 128256
215
+ }
216
+
217
+ [2025-03-17 23:25:49,958][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-32] due to args.save_total_limit
218
+ [2025-03-17 23:42:33,990][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
219
+ [2025-03-17 23:42:33,992][transformers.trainer][INFO] -
220
+ ***** Running Evaluation *****
221
+ [2025-03-17 23:42:33,992][transformers.trainer][INFO] - Num examples = 132
222
+ [2025-03-17 23:42:33,993][transformers.trainer][INFO] - Batch size = 2
223
+ [2025-03-17 23:43:49,537][transformers][INFO] - {'accuracy': 0.16666666666666666, 'RMSE': 88.00826407477052, 'QWK': -0.08383018092105265, 'HDIV': 0.303030303030303, 'Macro_F1': 0.11598793363499245, 'Micro_F1': 0.16666666666666666, 'Weighted_F1': 0.13846153846153844}
224
+ [2025-03-17 23:43:49,540][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-96
225
+ [2025-03-17 23:43:50,287][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
226
+ [2025-03-17 23:43:50,288][transformers.configuration_utils][INFO] - Model config LlamaConfig {
227
+ "architectures": [
228
+ "LlamaForCausalLM"
229
+ ],
230
+ "attention_bias": false,
231
+ "attention_dropout": 0.0,
232
+ "bos_token_id": 128000,
233
+ "eos_token_id": 128001,
234
+ "head_dim": 128,
235
+ "hidden_act": "silu",
236
+ "hidden_size": 4096,
237
+ "initializer_range": 0.02,
238
+ "intermediate_size": 14336,
239
+ "max_position_embeddings": 131072,
240
+ "mlp_bias": false,
241
+ "model_type": "llama",
242
+ "num_attention_heads": 32,
243
+ "num_hidden_layers": 32,
244
+ "num_key_value_heads": 8,
245
+ "pretraining_tp": 1,
246
+ "rms_norm_eps": 1e-05,
247
+ "rope_scaling": {
248
+ "factor": 8.0,
249
+ "high_freq_factor": 4.0,
250
+ "low_freq_factor": 1.0,
251
+ "original_max_position_embeddings": 8192,
252
+ "rope_type": "llama3"
253
+ },
254
+ "rope_theta": 500000.0,
255
+ "tie_word_embeddings": false,
256
+ "torch_dtype": "bfloat16",
257
+ "transformers_version": "4.49.0",
258
+ "use_cache": true,
259
+ "vocab_size": 128256
260
+ }
261
+
262
+ [2025-03-18 00:00:35,785][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
263
+ [2025-03-18 00:00:35,787][transformers.trainer][INFO] -
264
+ ***** Running Evaluation *****
265
+ [2025-03-18 00:00:35,787][transformers.trainer][INFO] - Num examples = 132
266
+ [2025-03-18 00:00:35,787][transformers.trainer][INFO] - Batch size = 2
267
+ [2025-03-18 00:01:51,406][transformers][INFO] - {'accuracy': 0.2727272727272727, 'RMSE': 65.22687678055308, 'QWK': 0.2237115474834126, 'HDIV': 0.1515151515151515, 'Macro_F1': 0.17972524968423245, 'Micro_F1': 0.2727272727272727, 'Weighted_F1': 0.2201439213416244}
268
+ [2025-03-18 00:01:51,409][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-128
269
+ [2025-03-18 00:01:51,843][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
270
+ [2025-03-18 00:01:51,844][transformers.configuration_utils][INFO] - Model config LlamaConfig {
271
+ "architectures": [
272
+ "LlamaForCausalLM"
273
+ ],
274
+ "attention_bias": false,
275
+ "attention_dropout": 0.0,
276
+ "bos_token_id": 128000,
277
+ "eos_token_id": 128001,
278
+ "head_dim": 128,
279
+ "hidden_act": "silu",
280
+ "hidden_size": 4096,
281
+ "initializer_range": 0.02,
282
+ "intermediate_size": 14336,
283
+ "max_position_embeddings": 131072,
284
+ "mlp_bias": false,
285
+ "model_type": "llama",
286
+ "num_attention_heads": 32,
287
+ "num_hidden_layers": 32,
288
+ "num_key_value_heads": 8,
289
+ "pretraining_tp": 1,
290
+ "rms_norm_eps": 1e-05,
291
+ "rope_scaling": {
292
+ "factor": 8.0,
293
+ "high_freq_factor": 4.0,
294
+ "low_freq_factor": 1.0,
295
+ "original_max_position_embeddings": 8192,
296
+ "rope_type": "llama3"
297
+ },
298
+ "rope_theta": 500000.0,
299
+ "tie_word_embeddings": false,
300
+ "torch_dtype": "bfloat16",
301
+ "transformers_version": "4.49.0",
302
+ "use_cache": true,
303
+ "vocab_size": 128256
304
+ }
305
+
306
+ [2025-03-18 00:01:53,684][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-64] due to args.save_total_limit
307
+ [2025-03-18 00:01:53,710][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-96] due to args.save_total_limit
308
+ [2025-03-18 00:18:38,011][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
309
+ [2025-03-18 00:18:38,014][transformers.trainer][INFO] -
310
+ ***** Running Evaluation *****
311
+ [2025-03-18 00:18:38,014][transformers.trainer][INFO] - Num examples = 132
312
+ [2025-03-18 00:18:38,014][transformers.trainer][INFO] - Batch size = 2
313
+ [2025-03-18 00:19:53,683][transformers][INFO] - {'accuracy': 0.21212121212121213, 'RMSE': 70.58199615920608, 'QWK': 0.047742750824966707, 'HDIV': 0.1515151515151515, 'Macro_F1': 0.13397158397158396, 'Micro_F1': 0.21212121212121213, 'Weighted_F1': 0.17766879585061401}
314
+ [2025-03-18 00:19:53,686][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-160
315
+ [2025-03-18 00:19:54,138][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
316
+ [2025-03-18 00:19:54,138][transformers.configuration_utils][INFO] - Model config LlamaConfig {
317
+ "architectures": [
318
+ "LlamaForCausalLM"
319
+ ],
320
+ "attention_bias": false,
321
+ "attention_dropout": 0.0,
322
+ "bos_token_id": 128000,
323
+ "eos_token_id": 128001,
324
+ "head_dim": 128,
325
+ "hidden_act": "silu",
326
+ "hidden_size": 4096,
327
+ "initializer_range": 0.02,
328
+ "intermediate_size": 14336,
329
+ "max_position_embeddings": 131072,
330
+ "mlp_bias": false,
331
+ "model_type": "llama",
332
+ "num_attention_heads": 32,
333
+ "num_hidden_layers": 32,
334
+ "num_key_value_heads": 8,
335
+ "pretraining_tp": 1,
336
+ "rms_norm_eps": 1e-05,
337
+ "rope_scaling": {
338
+ "factor": 8.0,
339
+ "high_freq_factor": 4.0,
340
+ "low_freq_factor": 1.0,
341
+ "original_max_position_embeddings": 8192,
342
+ "rope_type": "llama3"
343
+ },
344
+ "rope_theta": 500000.0,
345
+ "tie_word_embeddings": false,
346
+ "torch_dtype": "bfloat16",
347
+ "transformers_version": "4.49.0",
348
+ "use_cache": true,
349
+ "vocab_size": 128256
350
+ }
351
+
352
+ [2025-03-18 00:36:40,307][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
353
+ [2025-03-18 00:36:40,310][transformers.trainer][INFO] -
354
+ ***** Running Evaluation *****
355
+ [2025-03-18 00:36:40,311][transformers.trainer][INFO] - Num examples = 132
356
+ [2025-03-18 00:36:40,311][transformers.trainer][INFO] - Batch size = 2
357
+ [2025-03-18 00:37:55,888][transformers][INFO] - {'accuracy': 0.24242424242424243, 'RMSE': 65.59748701764651, 'QWK': 0.20500814332247552, 'HDIV': 0.11363636363636365, 'Macro_F1': 0.19603237333577708, 'Micro_F1': 0.24242424242424243, 'Weighted_F1': 0.24172658841086073}
358
+ [2025-03-18 00:37:55,892][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-192
359
+ [2025-03-18 00:37:56,578][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
360
+ [2025-03-18 00:37:56,579][transformers.configuration_utils][INFO] - Model config LlamaConfig {
361
+ "architectures": [
362
+ "LlamaForCausalLM"
363
+ ],
364
+ "attention_bias": false,
365
+ "attention_dropout": 0.0,
366
+ "bos_token_id": 128000,
367
+ "eos_token_id": 128001,
368
+ "head_dim": 128,
369
+ "hidden_act": "silu",
370
+ "hidden_size": 4096,
371
+ "initializer_range": 0.02,
372
+ "intermediate_size": 14336,
373
+ "max_position_embeddings": 131072,
374
+ "mlp_bias": false,
375
+ "model_type": "llama",
376
+ "num_attention_heads": 32,
377
+ "num_hidden_layers": 32,
378
+ "num_key_value_heads": 8,
379
+ "pretraining_tp": 1,
380
+ "rms_norm_eps": 1e-05,
381
+ "rope_scaling": {
382
+ "factor": 8.0,
383
+ "high_freq_factor": 4.0,
384
+ "low_freq_factor": 1.0,
385
+ "original_max_position_embeddings": 8192,
386
+ "rope_type": "llama3"
387
+ },
388
+ "rope_theta": 500000.0,
389
+ "tie_word_embeddings": false,
390
+ "torch_dtype": "bfloat16",
391
+ "transformers_version": "4.49.0",
392
+ "use_cache": true,
393
+ "vocab_size": 128256
394
+ }
395
+
396
+ [2025-03-18 00:37:58,359][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-160] due to args.save_total_limit
397
+ [2025-03-18 00:54:43,001][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
398
+ [2025-03-18 00:54:43,004][transformers.trainer][INFO] -
399
+ ***** Running Evaluation *****
400
+ [2025-03-18 00:54:43,004][transformers.trainer][INFO] - Num examples = 132
401
+ [2025-03-18 00:54:43,004][transformers.trainer][INFO] - Batch size = 2
402
+ [2025-03-18 00:55:58,683][transformers][INFO] - {'accuracy': 0.3333333333333333, 'RMSE': 61.20011883529758, 'QWK': 0.3801592608352077, 'HDIV': 0.09090909090909094, 'Macro_F1': 0.29144402966866734, 'Micro_F1': 0.3333333333333333, 'Weighted_F1': 0.32806062697367044}
403
+ [2025-03-18 00:55:58,685][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-224
404
+ [2025-03-18 00:56:00,042][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
405
+ [2025-03-18 00:56:00,043][transformers.configuration_utils][INFO] - Model config LlamaConfig {
406
+ "architectures": [
407
+ "LlamaForCausalLM"
408
+ ],
409
+ "attention_bias": false,
410
+ "attention_dropout": 0.0,
411
+ "bos_token_id": 128000,
412
+ "eos_token_id": 128001,
413
+ "head_dim": 128,
414
+ "hidden_act": "silu",
415
+ "hidden_size": 4096,
416
+ "initializer_range": 0.02,
417
+ "intermediate_size": 14336,
418
+ "max_position_embeddings": 131072,
419
+ "mlp_bias": false,
420
+ "model_type": "llama",
421
+ "num_attention_heads": 32,
422
+ "num_hidden_layers": 32,
423
+ "num_key_value_heads": 8,
424
+ "pretraining_tp": 1,
425
+ "rms_norm_eps": 1e-05,
426
+ "rope_scaling": {
427
+ "factor": 8.0,
428
+ "high_freq_factor": 4.0,
429
+ "low_freq_factor": 1.0,
430
+ "original_max_position_embeddings": 8192,
431
+ "rope_type": "llama3"
432
+ },
433
+ "rope_theta": 500000.0,
434
+ "tie_word_embeddings": false,
435
+ "torch_dtype": "bfloat16",
436
+ "transformers_version": "4.49.0",
437
+ "use_cache": true,
438
+ "vocab_size": 128256
439
+ }
440
+
441
+ [2025-03-18 00:56:01,976][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-128] due to args.save_total_limit
442
+ [2025-03-18 00:56:01,996][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-192] due to args.save_total_limit
443
+ [2025-03-18 01:12:47,695][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
444
+ [2025-03-18 01:12:47,698][transformers.trainer][INFO] -
445
+ ***** Running Evaluation *****
446
+ [2025-03-18 01:12:47,699][transformers.trainer][INFO] - Num examples = 132
447
+ [2025-03-18 01:12:47,699][transformers.trainer][INFO] - Batch size = 2
448
+ [2025-03-18 01:14:03,433][transformers][INFO] - {'accuracy': 0.1893939393939394, 'RMSE': 76.8311805390904, 'QWK': 0.05107463391591882, 'HDIV': 0.21969696969696972, 'Macro_F1': 0.13419994690332612, 'Micro_F1': 0.1893939393939394, 'Weighted_F1': 0.16065685872262273}
449
+ [2025-03-18 01:14:03,436][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-256
450
+ [2025-03-18 01:14:04,398][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
451
+ [2025-03-18 01:14:04,399][transformers.configuration_utils][INFO] - Model config LlamaConfig {
452
+ "architectures": [
453
+ "LlamaForCausalLM"
454
+ ],
455
+ "attention_bias": false,
456
+ "attention_dropout": 0.0,
457
+ "bos_token_id": 128000,
458
+ "eos_token_id": 128001,
459
+ "head_dim": 128,
460
+ "hidden_act": "silu",
461
+ "hidden_size": 4096,
462
+ "initializer_range": 0.02,
463
+ "intermediate_size": 14336,
464
+ "max_position_embeddings": 131072,
465
+ "mlp_bias": false,
466
+ "model_type": "llama",
467
+ "num_attention_heads": 32,
468
+ "num_hidden_layers": 32,
469
+ "num_key_value_heads": 8,
470
+ "pretraining_tp": 1,
471
+ "rms_norm_eps": 1e-05,
472
+ "rope_scaling": {
473
+ "factor": 8.0,
474
+ "high_freq_factor": 4.0,
475
+ "low_freq_factor": 1.0,
476
+ "original_max_position_embeddings": 8192,
477
+ "rope_type": "llama3"
478
+ },
479
+ "rope_theta": 500000.0,
480
+ "tie_word_embeddings": false,
481
+ "torch_dtype": "bfloat16",
482
+ "transformers_version": "4.49.0",
483
+ "use_cache": true,
484
+ "vocab_size": 128256
485
+ }
486
+
487
+ [2025-03-18 01:30:51,890][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
488
+ [2025-03-18 01:30:51,893][transformers.trainer][INFO] -
489
+ ***** Running Evaluation *****
490
+ [2025-03-18 01:30:51,893][transformers.trainer][INFO] - Num examples = 132
491
+ [2025-03-18 01:30:51,893][transformers.trainer][INFO] - Batch size = 2
492
+ [2025-03-18 01:32:07,555][transformers][INFO] - {'accuracy': 0.32575757575757575, 'RMSE': 62.1825270205921, 'QWK': 0.36911183027687877, 'HDIV': 0.11363636363636365, 'Macro_F1': 0.22144078750334958, 'Micro_F1': 0.32575757575757575, 'Weighted_F1': 0.27548695925061367}
493
+ [2025-03-18 01:32:07,559][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-288
494
+ [2025-03-18 01:32:08,212][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
495
+ [2025-03-18 01:32:08,213][transformers.configuration_utils][INFO] - Model config LlamaConfig {
496
+ "architectures": [
497
+ "LlamaForCausalLM"
498
+ ],
499
+ "attention_bias": false,
500
+ "attention_dropout": 0.0,
501
+ "bos_token_id": 128000,
502
+ "eos_token_id": 128001,
503
+ "head_dim": 128,
504
+ "hidden_act": "silu",
505
+ "hidden_size": 4096,
506
+ "initializer_range": 0.02,
507
+ "intermediate_size": 14336,
508
+ "max_position_embeddings": 131072,
509
+ "mlp_bias": false,
510
+ "model_type": "llama",
511
+ "num_attention_heads": 32,
512
+ "num_hidden_layers": 32,
513
+ "num_key_value_heads": 8,
514
+ "pretraining_tp": 1,
515
+ "rms_norm_eps": 1e-05,
516
+ "rope_scaling": {
517
+ "factor": 8.0,
518
+ "high_freq_factor": 4.0,
519
+ "low_freq_factor": 1.0,
520
+ "original_max_position_embeddings": 8192,
521
+ "rope_type": "llama3"
522
+ },
523
+ "rope_theta": 500000.0,
524
+ "tie_word_embeddings": false,
525
+ "torch_dtype": "bfloat16",
526
+ "transformers_version": "4.49.0",
527
+ "use_cache": true,
528
+ "vocab_size": 128256
529
+ }
530
+
531
+ [2025-03-18 01:32:09,335][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-256] due to args.save_total_limit
532
+ [2025-03-18 01:48:53,732][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
533
+ [2025-03-18 01:48:53,737][transformers.trainer][INFO] -
534
+ ***** Running Evaluation *****
535
+ [2025-03-18 01:48:53,737][transformers.trainer][INFO] - Num examples = 132
536
+ [2025-03-18 01:48:53,737][transformers.trainer][INFO] - Batch size = 2
537
+ [2025-03-18 01:50:09,451][transformers][INFO] - {'accuracy': 0.30303030303030304, 'RMSE': 64.10219114110616, 'QWK': 0.2570233114166167, 'HDIV': 0.13636363636363635, 'Macro_F1': 0.22320469355717623, 'Micro_F1': 0.30303030303030304, 'Weighted_F1': 0.27005319566718955}
538
+ [2025-03-18 01:50:09,454][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-320
539
+ [2025-03-18 01:50:10,446][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
540
+ [2025-03-18 01:50:10,447][transformers.configuration_utils][INFO] - Model config LlamaConfig {
541
+ "architectures": [
542
+ "LlamaForCausalLM"
543
+ ],
544
+ "attention_bias": false,
545
+ "attention_dropout": 0.0,
546
+ "bos_token_id": 128000,
547
+ "eos_token_id": 128001,
548
+ "head_dim": 128,
549
+ "hidden_act": "silu",
550
+ "hidden_size": 4096,
551
+ "initializer_range": 0.02,
552
+ "intermediate_size": 14336,
553
+ "max_position_embeddings": 131072,
554
+ "mlp_bias": false,
555
+ "model_type": "llama",
556
+ "num_attention_heads": 32,
557
+ "num_hidden_layers": 32,
558
+ "num_key_value_heads": 8,
559
+ "pretraining_tp": 1,
560
+ "rms_norm_eps": 1e-05,
561
+ "rope_scaling": {
562
+ "factor": 8.0,
563
+ "high_freq_factor": 4.0,
564
+ "low_freq_factor": 1.0,
565
+ "original_max_position_embeddings": 8192,
566
+ "rope_type": "llama3"
567
+ },
568
+ "rope_theta": 500000.0,
569
+ "tie_word_embeddings": false,
570
+ "torch_dtype": "bfloat16",
571
+ "transformers_version": "4.49.0",
572
+ "use_cache": true,
573
+ "vocab_size": 128256
574
+ }
575
+
576
+ [2025-03-18 01:50:12,396][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-288] due to args.save_total_limit
577
+ [2025-03-18 02:06:58,515][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
578
+ [2025-03-18 02:06:58,518][transformers.trainer][INFO] -
579
+ ***** Running Evaluation *****
580
+ [2025-03-18 02:06:58,518][transformers.trainer][INFO] - Num examples = 132
581
+ [2025-03-18 02:06:58,518][transformers.trainer][INFO] - Batch size = 2
582
+ [2025-03-18 02:08:14,181][transformers][INFO] - {'accuracy': 0.3181818181818182, 'RMSE': 60.0, 'QWK': 0.3805069211807093, 'HDIV': 0.10606060606060608, 'Macro_F1': 0.22958520739630187, 'Micro_F1': 0.3181818181818182, 'Weighted_F1': 0.28138657943755396}
583
+ [2025-03-18 02:08:14,185][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-352
584
+ [2025-03-18 02:08:14,930][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
585
+ [2025-03-18 02:08:14,931][transformers.configuration_utils][INFO] - Model config LlamaConfig {
586
+ "architectures": [
587
+ "LlamaForCausalLM"
588
+ ],
589
+ "attention_bias": false,
590
+ "attention_dropout": 0.0,
591
+ "bos_token_id": 128000,
592
+ "eos_token_id": 128001,
593
+ "head_dim": 128,
594
+ "hidden_act": "silu",
595
+ "hidden_size": 4096,
596
+ "initializer_range": 0.02,
597
+ "intermediate_size": 14336,
598
+ "max_position_embeddings": 131072,
599
+ "mlp_bias": false,
600
+ "model_type": "llama",
601
+ "num_attention_heads": 32,
602
+ "num_hidden_layers": 32,
603
+ "num_key_value_heads": 8,
604
+ "pretraining_tp": 1,
605
+ "rms_norm_eps": 1e-05,
606
+ "rope_scaling": {
607
+ "factor": 8.0,
608
+ "high_freq_factor": 4.0,
609
+ "low_freq_factor": 1.0,
610
+ "original_max_position_embeddings": 8192,
611
+ "rope_type": "llama3"
612
+ },
613
+ "rope_theta": 500000.0,
614
+ "tie_word_embeddings": false,
615
+ "torch_dtype": "bfloat16",
616
+ "transformers_version": "4.49.0",
617
+ "use_cache": true,
618
+ "vocab_size": 128256
619
+ }
620
+
621
+ [2025-03-18 02:08:16,742][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-224] due to args.save_total_limit
622
+ [2025-03-18 02:08:16,766][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-320] due to args.save_total_limit
623
+ [2025-03-18 02:25:01,393][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
624
+ [2025-03-18 02:25:01,396][transformers.trainer][INFO] -
625
+ ***** Running Evaluation *****
626
+ [2025-03-18 02:25:01,396][transformers.trainer][INFO] - Num examples = 132
627
+ [2025-03-18 02:25:01,396][transformers.trainer][INFO] - Batch size = 2
628
+ [2025-03-18 02:26:16,975][transformers][INFO] - {'accuracy': 0.26515151515151514, 'RMSE': 60.201681240995455, 'QWK': 0.39525619024270653, 'HDIV': 0.08333333333333337, 'Macro_F1': 0.24763387289624172, 'Micro_F1': 0.26515151515151514, 'Weighted_F1': 0.25899349085256135}
629
+ [2025-03-18 02:26:16,979][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-384
630
+ [2025-03-18 02:26:17,987][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
631
+ [2025-03-18 02:26:17,988][transformers.configuration_utils][INFO] - Model config LlamaConfig {
632
+ "architectures": [
633
+ "LlamaForCausalLM"
634
+ ],
635
+ "attention_bias": false,
636
+ "attention_dropout": 0.0,
637
+ "bos_token_id": 128000,
638
+ "eos_token_id": 128001,
639
+ "head_dim": 128,
640
+ "hidden_act": "silu",
641
+ "hidden_size": 4096,
642
+ "initializer_range": 0.02,
643
+ "intermediate_size": 14336,
644
+ "max_position_embeddings": 131072,
645
+ "mlp_bias": false,
646
+ "model_type": "llama",
647
+ "num_attention_heads": 32,
648
+ "num_hidden_layers": 32,
649
+ "num_key_value_heads": 8,
650
+ "pretraining_tp": 1,
651
+ "rms_norm_eps": 1e-05,
652
+ "rope_scaling": {
653
+ "factor": 8.0,
654
+ "high_freq_factor": 4.0,
655
+ "low_freq_factor": 1.0,
656
+ "original_max_position_embeddings": 8192,
657
+ "rope_type": "llama3"
658
+ },
659
+ "rope_theta": 500000.0,
660
+ "tie_word_embeddings": false,
661
+ "torch_dtype": "bfloat16",
662
+ "transformers_version": "4.49.0",
663
+ "use_cache": true,
664
+ "vocab_size": 128256
665
+ }
666
+
667
+ [2025-03-18 02:26:19,080][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-352] due to args.save_total_limit
668
+ [2025-03-18 02:43:05,751][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
669
+ [2025-03-18 02:43:05,754][transformers.trainer][INFO] -
670
+ ***** Running Evaluation *****
671
+ [2025-03-18 02:43:05,754][transformers.trainer][INFO] - Num examples = 132
672
+ [2025-03-18 02:43:05,754][transformers.trainer][INFO] - Batch size = 2
673
+ [2025-03-18 02:44:21,436][transformers][INFO] - {'accuracy': 0.2878787878787879, 'RMSE': 58.981250230796896, 'QWK': 0.38916478555304734, 'HDIV': 0.0757575757575758, 'Macro_F1': 0.23379159344505882, 'Micro_F1': 0.2878787878787879, 'Weighted_F1': 0.23514724849108284}
674
+ [2025-03-18 02:44:21,440][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-416
675
+ [2025-03-18 02:44:22,161][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
676
+ [2025-03-18 02:44:22,161][transformers.configuration_utils][INFO] - Model config LlamaConfig {
677
+ "architectures": [
678
+ "LlamaForCausalLM"
679
+ ],
680
+ "attention_bias": false,
681
+ "attention_dropout": 0.0,
682
+ "bos_token_id": 128000,
683
+ "eos_token_id": 128001,
684
+ "head_dim": 128,
685
+ "hidden_act": "silu",
686
+ "hidden_size": 4096,
687
+ "initializer_range": 0.02,
688
+ "intermediate_size": 14336,
689
+ "max_position_embeddings": 131072,
690
+ "mlp_bias": false,
691
+ "model_type": "llama",
692
+ "num_attention_heads": 32,
693
+ "num_hidden_layers": 32,
694
+ "num_key_value_heads": 8,
695
+ "pretraining_tp": 1,
696
+ "rms_norm_eps": 1e-05,
697
+ "rope_scaling": {
698
+ "factor": 8.0,
699
+ "high_freq_factor": 4.0,
700
+ "low_freq_factor": 1.0,
701
+ "original_max_position_embeddings": 8192,
702
+ "rope_type": "llama3"
703
+ },
704
+ "rope_theta": 500000.0,
705
+ "tie_word_embeddings": false,
706
+ "torch_dtype": "bfloat16",
707
+ "transformers_version": "4.49.0",
708
+ "use_cache": true,
709
+ "vocab_size": 128256
710
+ }
711
+
712
+ [2025-03-18 03:01:10,574][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
713
+ [2025-03-18 03:01:10,578][transformers.trainer][INFO] -
714
+ ***** Running Evaluation *****
715
+ [2025-03-18 03:01:10,578][transformers.trainer][INFO] - Num examples = 132
716
+ [2025-03-18 03:01:10,578][transformers.trainer][INFO] - Batch size = 2
717
+ [2025-03-18 03:02:26,292][transformers][INFO] - {'accuracy': 0.30303030303030304, 'RMSE': 66.51497894641795, 'QWK': 0.34061422236820504, 'HDIV': 0.13636363636363635, 'Macro_F1': 0.22355989731978676, 'Micro_F1': 0.30303030303030304, 'Weighted_F1': 0.2666766786880561}
718
+ [2025-03-18 03:02:26,296][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-448
719
+ [2025-03-18 03:02:27,111][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
720
+ [2025-03-18 03:02:27,112][transformers.configuration_utils][INFO] - Model config LlamaConfig {
721
+ "architectures": [
722
+ "LlamaForCausalLM"
723
+ ],
724
+ "attention_bias": false,
725
+ "attention_dropout": 0.0,
726
+ "bos_token_id": 128000,
727
+ "eos_token_id": 128001,
728
+ "head_dim": 128,
729
+ "hidden_act": "silu",
730
+ "hidden_size": 4096,
731
+ "initializer_range": 0.02,
732
+ "intermediate_size": 14336,
733
+ "max_position_embeddings": 131072,
734
+ "mlp_bias": false,
735
+ "model_type": "llama",
736
+ "num_attention_heads": 32,
737
+ "num_hidden_layers": 32,
738
+ "num_key_value_heads": 8,
739
+ "pretraining_tp": 1,
740
+ "rms_norm_eps": 1e-05,
741
+ "rope_scaling": {
742
+ "factor": 8.0,
743
+ "high_freq_factor": 4.0,
744
+ "low_freq_factor": 1.0,
745
+ "original_max_position_embeddings": 8192,
746
+ "rope_type": "llama3"
747
+ },
748
+ "rope_theta": 500000.0,
749
+ "tie_word_embeddings": false,
750
+ "torch_dtype": "bfloat16",
751
+ "transformers_version": "4.49.0",
752
+ "use_cache": true,
753
+ "vocab_size": 128256
754
+ }
755
+
756
+ [2025-03-18 03:02:28,990][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-416] due to args.save_total_limit
757
+ [2025-03-18 03:19:16,010][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
758
+ [2025-03-18 03:19:16,013][transformers.trainer][INFO] -
759
+ ***** Running Evaluation *****
760
+ [2025-03-18 03:19:16,013][transformers.trainer][INFO] - Num examples = 132
761
+ [2025-03-18 03:19:16,013][transformers.trainer][INFO] - Batch size = 2
762
+ [2025-03-18 03:20:31,669][transformers][INFO] - {'accuracy': 0.32575757575757575, 'RMSE': 59.39084716749482, 'QWK': 0.4298860128235573, 'HDIV': 0.08333333333333337, 'Macro_F1': 0.26813891813891816, 'Micro_F1': 0.32575757575757575, 'Weighted_F1': 0.31763971763971766}
763
+ [2025-03-18 03:20:31,673][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-480
764
+ [2025-03-18 03:20:32,623][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
765
+ [2025-03-18 03:20:32,624][transformers.configuration_utils][INFO] - Model config LlamaConfig {
766
+ "architectures": [
767
+ "LlamaForCausalLM"
768
+ ],
769
+ "attention_bias": false,
770
+ "attention_dropout": 0.0,
771
+ "bos_token_id": 128000,
772
+ "eos_token_id": 128001,
773
+ "head_dim": 128,
774
+ "hidden_act": "silu",
775
+ "hidden_size": 4096,
776
+ "initializer_range": 0.02,
777
+ "intermediate_size": 14336,
778
+ "max_position_embeddings": 131072,
779
+ "mlp_bias": false,
780
+ "model_type": "llama",
781
+ "num_attention_heads": 32,
782
+ "num_hidden_layers": 32,
783
+ "num_key_value_heads": 8,
784
+ "pretraining_tp": 1,
785
+ "rms_norm_eps": 1e-05,
786
+ "rope_scaling": {
787
+ "factor": 8.0,
788
+ "high_freq_factor": 4.0,
789
+ "low_freq_factor": 1.0,
790
+ "original_max_position_embeddings": 8192,
791
+ "rope_type": "llama3"
792
+ },
793
+ "rope_theta": 500000.0,
794
+ "tie_word_embeddings": false,
795
+ "torch_dtype": "bfloat16",
796
+ "transformers_version": "4.49.0",
797
+ "use_cache": true,
798
+ "vocab_size": 128256
799
+ }
800
+
801
+ [2025-03-18 03:20:34,492][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-384] due to args.save_total_limit
802
+ [2025-03-18 03:20:34,513][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-448] due to args.save_total_limit
803
+ [2025-03-18 03:37:21,087][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
804
+ [2025-03-18 03:37:21,090][transformers.trainer][INFO] -
805
+ ***** Running Evaluation *****
806
+ [2025-03-18 03:37:21,090][transformers.trainer][INFO] - Num examples = 132
807
+ [2025-03-18 03:37:21,090][transformers.trainer][INFO] - Batch size = 2
808
+ [2025-03-18 03:38:36,834][transformers][INFO] - {'accuracy': 0.2878787878787879, 'RMSE': 58.15340215170718, 'QWK': 0.42696209622207004, 'HDIV': 0.05303030303030298, 'Macro_F1': 0.20220652686992957, 'Micro_F1': 0.2878787878787879, 'Weighted_F1': 0.23529808964325488}
809
+ [2025-03-18 03:38:36,837][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-512
810
+ [2025-03-18 03:38:37,400][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
811
+ [2025-03-18 03:38:37,401][transformers.configuration_utils][INFO] - Model config LlamaConfig {
812
+ "architectures": [
813
+ "LlamaForCausalLM"
814
+ ],
815
+ "attention_bias": false,
816
+ "attention_dropout": 0.0,
817
+ "bos_token_id": 128000,
818
+ "eos_token_id": 128001,
819
+ "head_dim": 128,
820
+ "hidden_act": "silu",
821
+ "hidden_size": 4096,
822
+ "initializer_range": 0.02,
823
+ "intermediate_size": 14336,
824
+ "max_position_embeddings": 131072,
825
+ "mlp_bias": false,
826
+ "model_type": "llama",
827
+ "num_attention_heads": 32,
828
+ "num_hidden_layers": 32,
829
+ "num_key_value_heads": 8,
830
+ "pretraining_tp": 1,
831
+ "rms_norm_eps": 1e-05,
832
+ "rope_scaling": {
833
+ "factor": 8.0,
834
+ "high_freq_factor": 4.0,
835
+ "low_freq_factor": 1.0,
836
+ "original_max_position_embeddings": 8192,
837
+ "rope_type": "llama3"
838
+ },
839
+ "rope_theta": 500000.0,
840
+ "tie_word_embeddings": false,
841
+ "torch_dtype": "bfloat16",
842
+ "transformers_version": "4.49.0",
843
+ "use_cache": true,
844
+ "vocab_size": 128256
845
+ }
846
+
847
+ [2025-03-18 03:55:25,879][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
848
+ [2025-03-18 03:55:25,882][transformers.trainer][INFO] -
849
+ ***** Running Evaluation *****
850
+ [2025-03-18 03:55:25,883][transformers.trainer][INFO] - Num examples = 132
851
+ [2025-03-18 03:55:25,883][transformers.trainer][INFO] - Batch size = 2
852
+ [2025-03-18 03:56:41,664][transformers][INFO] - {'accuracy': 0.2727272727272727, 'RMSE': 59.797638546664835, 'QWK': 0.41984505363528, 'HDIV': 0.08333333333333337, 'Macro_F1': 0.2372466413177874, 'Micro_F1': 0.2727272727272727, 'Weighted_F1': 0.2692383741516407}
853
+ [2025-03-18 03:56:41,668][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-544
854
+ [2025-03-18 03:56:42,430][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
855
+ [2025-03-18 03:56:42,431][transformers.configuration_utils][INFO] - Model config LlamaConfig {
856
+ "architectures": [
857
+ "LlamaForCausalLM"
858
+ ],
859
+ "attention_bias": false,
860
+ "attention_dropout": 0.0,
861
+ "bos_token_id": 128000,
862
+ "eos_token_id": 128001,
863
+ "head_dim": 128,
864
+ "hidden_act": "silu",
865
+ "hidden_size": 4096,
866
+ "initializer_range": 0.02,
867
+ "intermediate_size": 14336,
868
+ "max_position_embeddings": 131072,
869
+ "mlp_bias": false,
870
+ "model_type": "llama",
871
+ "num_attention_heads": 32,
872
+ "num_hidden_layers": 32,
873
+ "num_key_value_heads": 8,
874
+ "pretraining_tp": 1,
875
+ "rms_norm_eps": 1e-05,
876
+ "rope_scaling": {
877
+ "factor": 8.0,
878
+ "high_freq_factor": 4.0,
879
+ "low_freq_factor": 1.0,
880
+ "original_max_position_embeddings": 8192,
881
+ "rope_type": "llama3"
882
+ },
883
+ "rope_theta": 500000.0,
884
+ "tie_word_embeddings": false,
885
+ "torch_dtype": "bfloat16",
886
+ "transformers_version": "4.49.0",
887
+ "use_cache": true,
888
+ "vocab_size": 128256
889
+ }
890
+
891
+ [2025-03-18 03:56:44,288][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-512] due to args.save_total_limit
892
+ [2025-03-18 04:13:31,366][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
893
+ [2025-03-18 04:13:31,371][transformers.trainer][INFO] -
894
+ ***** Running Evaluation *****
895
+ [2025-03-18 04:13:31,371][transformers.trainer][INFO] - Num examples = 132
896
+ [2025-03-18 04:13:31,371][transformers.trainer][INFO] - Batch size = 2
897
+ [2025-03-18 04:14:47,009][transformers][INFO] - {'accuracy': 0.2727272727272727, 'RMSE': 60.0, 'QWK': 0.3971953994710622, 'HDIV': 0.06818181818181823, 'Macro_F1': 0.2294526863492381, 'Micro_F1': 0.2727272727272727, 'Weighted_F1': 0.2691151703690889}
898
+ [2025-03-18 04:14:47,012][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-576
899
+ [2025-03-18 04:14:47,483][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
900
+ [2025-03-18 04:14:47,484][transformers.configuration_utils][INFO] - Model config LlamaConfig {
901
+ "architectures": [
902
+ "LlamaForCausalLM"
903
+ ],
904
+ "attention_bias": false,
905
+ "attention_dropout": 0.0,
906
+ "bos_token_id": 128000,
907
+ "eos_token_id": 128001,
908
+ "head_dim": 128,
909
+ "hidden_act": "silu",
910
+ "hidden_size": 4096,
911
+ "initializer_range": 0.02,
912
+ "intermediate_size": 14336,
913
+ "max_position_embeddings": 131072,
914
+ "mlp_bias": false,
915
+ "model_type": "llama",
916
+ "num_attention_heads": 32,
917
+ "num_hidden_layers": 32,
918
+ "num_key_value_heads": 8,
919
+ "pretraining_tp": 1,
920
+ "rms_norm_eps": 1e-05,
921
+ "rope_scaling": {
922
+ "factor": 8.0,
923
+ "high_freq_factor": 4.0,
924
+ "low_freq_factor": 1.0,
925
+ "original_max_position_embeddings": 8192,
926
+ "rope_type": "llama3"
927
+ },
928
+ "rope_theta": 500000.0,
929
+ "tie_word_embeddings": false,
930
+ "torch_dtype": "bfloat16",
931
+ "transformers_version": "4.49.0",
932
+ "use_cache": true,
933
+ "vocab_size": 128256
934
+ }
935
+
936
+ [2025-03-18 04:14:48,675][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-544] due to args.save_total_limit
937
+ [2025-03-18 04:31:35,446][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
938
+ [2025-03-18 04:31:35,449][transformers.trainer][INFO] -
939
+ ***** Running Evaluation *****
940
+ [2025-03-18 04:31:35,449][transformers.trainer][INFO] - Num examples = 132
941
+ [2025-03-18 04:31:35,449][transformers.trainer][INFO] - Batch size = 2
942
+ [2025-03-18 04:32:51,159][transformers][INFO] - {'accuracy': 0.3181818181818182, 'RMSE': 58.981250230796896, 'QWK': 0.4287696019300361, 'HDIV': 0.0757575757575758, 'Macro_F1': 0.2680758090649689, 'Micro_F1': 0.3181818181818182, 'Weighted_F1': 0.31353410215198835}
943
+ [2025-03-18 04:32:51,162][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-608
944
+ [2025-03-18 04:32:51,600][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
945
+ [2025-03-18 04:32:51,601][transformers.configuration_utils][INFO] - Model config LlamaConfig {
946
+ "architectures": [
947
+ "LlamaForCausalLM"
948
+ ],
949
+ "attention_bias": false,
950
+ "attention_dropout": 0.0,
951
+ "bos_token_id": 128000,
952
+ "eos_token_id": 128001,
953
+ "head_dim": 128,
954
+ "hidden_act": "silu",
955
+ "hidden_size": 4096,
956
+ "initializer_range": 0.02,
957
+ "intermediate_size": 14336,
958
+ "max_position_embeddings": 131072,
959
+ "mlp_bias": false,
960
+ "model_type": "llama",
961
+ "num_attention_heads": 32,
962
+ "num_hidden_layers": 32,
963
+ "num_key_value_heads": 8,
964
+ "pretraining_tp": 1,
965
+ "rms_norm_eps": 1e-05,
966
+ "rope_scaling": {
967
+ "factor": 8.0,
968
+ "high_freq_factor": 4.0,
969
+ "low_freq_factor": 1.0,
970
+ "original_max_position_embeddings": 8192,
971
+ "rope_type": "llama3"
972
+ },
973
+ "rope_theta": 500000.0,
974
+ "tie_word_embeddings": false,
975
+ "torch_dtype": "bfloat16",
976
+ "transformers_version": "4.49.0",
977
+ "use_cache": true,
978
+ "vocab_size": 128256
979
+ }
980
+
981
+ [2025-03-18 04:32:53,465][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-576] due to args.save_total_limit
982
+ [2025-03-18 04:39:20,353][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-620
983
+ [2025-03-18 04:39:20,897][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
984
+ [2025-03-18 04:39:20,898][transformers.configuration_utils][INFO] - Model config LlamaConfig {
985
+ "architectures": [
986
+ "LlamaForCausalLM"
987
+ ],
988
+ "attention_bias": false,
989
+ "attention_dropout": 0.0,
990
+ "bos_token_id": 128000,
991
+ "eos_token_id": 128001,
992
+ "head_dim": 128,
993
+ "hidden_act": "silu",
994
+ "hidden_size": 4096,
995
+ "initializer_range": 0.02,
996
+ "intermediate_size": 14336,
997
+ "max_position_embeddings": 131072,
998
+ "mlp_bias": false,
999
+ "model_type": "llama",
1000
+ "num_attention_heads": 32,
1001
+ "num_hidden_layers": 32,
1002
+ "num_key_value_heads": 8,
1003
+ "pretraining_tp": 1,
1004
+ "rms_norm_eps": 1e-05,
1005
+ "rope_scaling": {
1006
+ "factor": 8.0,
1007
+ "high_freq_factor": 4.0,
1008
+ "low_freq_factor": 1.0,
1009
+ "original_max_position_embeddings": 8192,
1010
+ "rope_type": "llama3"
1011
+ },
1012
+ "rope_theta": 500000.0,
1013
+ "tie_word_embeddings": false,
1014
+ "torch_dtype": "bfloat16",
1015
+ "transformers_version": "4.49.0",
1016
+ "use_cache": true,
1017
+ "vocab_size": 128256
1018
+ }
1019
+
1020
+ [2025-03-18 04:39:22,802][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-608] due to args.save_total_limit
1021
+ [2025-03-18 04:39:22,821][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
1022
+ [2025-03-18 04:39:22,824][transformers.trainer][INFO] -
1023
+ ***** Running Evaluation *****
1024
+ [2025-03-18 04:39:22,824][transformers.trainer][INFO] - Num examples = 132
1025
+ [2025-03-18 04:39:22,824][transformers.trainer][INFO] - Batch size = 2
1026
+ [2025-03-18 04:40:38,446][transformers][INFO] - {'accuracy': 0.3409090909090909, 'RMSE': 57.94459213400123, 'QWK': 0.4536489151873767, 'HDIV': 0.08333333333333337, 'Macro_F1': 0.28543976348854394, 'Micro_F1': 0.3409090909090909, 'Weighted_F1': 0.3336524894174562}
1027
+ [2025-03-18 04:40:38,449][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-620
1028
+ [2025-03-18 04:40:39,117][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
1029
+ [2025-03-18 04:40:39,118][transformers.configuration_utils][INFO] - Model config LlamaConfig {
1030
+ "architectures": [
1031
+ "LlamaForCausalLM"
1032
+ ],
1033
+ "attention_bias": false,
1034
+ "attention_dropout": 0.0,
1035
+ "bos_token_id": 128000,
1036
+ "eos_token_id": 128001,
1037
+ "head_dim": 128,
1038
+ "hidden_act": "silu",
1039
+ "hidden_size": 4096,
1040
+ "initializer_range": 0.02,
1041
+ "intermediate_size": 14336,
1042
+ "max_position_embeddings": 131072,
1043
+ "mlp_bias": false,
1044
+ "model_type": "llama",
1045
+ "num_attention_heads": 32,
1046
+ "num_hidden_layers": 32,
1047
+ "num_key_value_heads": 8,
1048
+ "pretraining_tp": 1,
1049
+ "rms_norm_eps": 1e-05,
1050
+ "rope_scaling": {
1051
+ "factor": 8.0,
1052
+ "high_freq_factor": 4.0,
1053
+ "low_freq_factor": 1.0,
1054
+ "original_max_position_embeddings": 8192,
1055
+ "rope_type": "llama3"
1056
+ },
1057
+ "rope_theta": 500000.0,
1058
+ "tie_word_embeddings": false,
1059
+ "torch_dtype": "bfloat16",
1060
+ "transformers_version": "4.49.0",
1061
+ "use_cache": true,
1062
+ "vocab_size": 128256
1063
+ }
1064
+
1065
+ [2025-03-18 04:40:41,068][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-480] due to args.save_total_limit
1066
+ [2025-03-18 04:40:41,091][transformers.trainer][INFO] -
1067
+
1068
+ Training completed. Do not forget to share your model on huggingface.co/models =)
1069
+
1070
+
1071
+ [2025-03-18 04:40:41,092][transformers.trainer][INFO] - Loading best model from /workspace/jbcs2025/outputs/2025-03-17/22-47-57/results/llama31_8b-balanced/C5/checkpoint-620 (score: 0.4536489151873767).
1072
+ [2025-03-18 04:40:47,380][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
1073
+ [2025-03-18 04:40:47,383][transformers.trainer][INFO] -
1074
+ ***** Running Evaluation *****
1075
+ [2025-03-18 04:40:47,383][transformers.trainer][INFO] - Num examples = 132
1076
+ [2025-03-18 04:40:47,383][transformers.trainer][INFO] - Batch size = 2
1077
+ [2025-03-18 04:42:02,861][transformers][INFO] - {'accuracy': 0.3409090909090909, 'RMSE': 57.94459213400123, 'QWK': 0.4536489151873767, 'HDIV': 0.08333333333333337, 'Macro_F1': 0.28543976348854394, 'Micro_F1': 0.3409090909090909, 'Weighted_F1': 0.3336524894174562}
1078
+ [2025-03-18 04:42:02,865][__main__][INFO] - Training completed successfully.
1079
+ [2025-03-18 04:42:02,865][__main__][INFO] - Running on Test
1080
+ [2025-03-18 04:42:02,865][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt. If supporting_text, essay_text, id_prompt, id, reference, grades, essay_year, prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
1081
+ [2025-03-18 04:42:02,868][transformers.trainer][INFO] -
1082
+ ***** Running Evaluation *****
1083
+ [2025-03-18 04:42:02,868][transformers.trainer][INFO] - Num examples = 138
1084
+ [2025-03-18 04:42:02,868][transformers.trainer][INFO] - Batch size = 2
1085
+ [2025-03-18 04:43:25,959][transformers][INFO] - {'accuracy': 0.2898550724637681, 'RMSE': 62.60064360459999, 'QWK': 0.400524367674275, 'HDIV': 0.10144927536231885, 'Macro_F1': 0.2873977873977874, 'Micro_F1': 0.2898550724637681, 'Weighted_F1': 0.27963119810945897}
1086
+ [2025-03-18 04:43:25,962][transformers.trainer][INFO] - Saving model checkpoint to ./results/llama31_8b-balanced/C5/best_model
1087
+ [2025-03-18 04:43:26,364][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b/config.json
1088
+ [2025-03-18 04:43:26,364][transformers.configuration_utils][INFO] - Model config LlamaConfig {
1089
+ "architectures": [
1090
+ "LlamaForCausalLM"
1091
+ ],
1092
+ "attention_bias": false,
1093
+ "attention_dropout": 0.0,
1094
+ "bos_token_id": 128000,
1095
+ "eos_token_id": 128001,
1096
+ "head_dim": 128,
1097
+ "hidden_act": "silu",
1098
+ "hidden_size": 4096,
1099
+ "initializer_range": 0.02,
1100
+ "intermediate_size": 14336,
1101
+ "max_position_embeddings": 131072,
1102
+ "mlp_bias": false,
1103
+ "model_type": "llama",
1104
+ "num_attention_heads": 32,
1105
+ "num_hidden_layers": 32,
1106
+ "num_key_value_heads": 8,
1107
+ "pretraining_tp": 1,
1108
+ "rms_norm_eps": 1e-05,
1109
+ "rope_scaling": {
1110
+ "factor": 8.0,
1111
+ "high_freq_factor": 4.0,
1112
+ "low_freq_factor": 1.0,
1113
+ "original_max_position_embeddings": 8192,
1114
+ "rope_type": "llama3"
1115
+ },
1116
+ "rope_theta": 500000.0,
1117
+ "tie_word_embeddings": false,
1118
+ "torch_dtype": "bfloat16",
1119
+ "transformers_version": "4.49.0",
1120
+ "use_cache": true,
1121
+ "vocab_size": 128256
1122
+ }
1123
+
1124
+ [2025-03-18 04:43:27,995][__main__][INFO] - Fine Tuning Finished.
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7465e1d2310c126276d3dbe9799d71decbf73e10734d3057de043efe139827d
3
+ size 5432