Training in progress, step 500

Browse files

Files changed (7) hide show

README.md +15 -14
chat_template.jinja +6 -0
config.json +30 -6
model.safetensors +1 -1
special_tokens_map.json +1 -1
tokenizer_config.json +1 -2
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,18 +1,18 @@
 ---
-base_model: Qwen/Qwen2-0.5B
-datasets: trl-lib/prm800k
 library_name: transformers
 model_name: Qwen2-0.5B-Reward
 tags:
 - generated_from_trainer
 - trl
-- stepwise-reward-trainer
 licence: license
 ---
 # Model Card for Qwen2-0.5B-Reward
-This model is a fine-tuned version of [Qwen/Qwen2-0.5B](https://huggingface.co/Qwen/Qwen2-0.5B) on the [trl-lib/prm800k](https://huggingface.co/datasets/trl-lib/prm800k) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -28,25 +28,26 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/huggingface/runs/zvj1dih4)
-This model was trained with Stepwise Reward.
 ### Framework versions
-- TRL: 0.13.0.dev0
-- Transformers: 4.47.0.dev0
-- Pytorch: 2.5.0
-- Datasets: 3.1.0
-- Tokenizers: 0.20.3
 ## Citations
-Cite Stepwise Reward as:
 ```bibtex
 @article{uesato2022solving,
-    title        = {Solving Math Word Problems With Process- and Outcome-Based Feedback},
     author       = {Uesato, Jonathan and Kushman, Nate and Kumar, Ramana and Song, Francis and Siegel, Noah and Wang, Lisa and Creswell, Antonia and Irving, Geoffrey and Higgins, Irina},
     year         = 2022,
     journal      = {arXiv preprint arXiv:2211.14275}
@@ -58,7 +59,7 @@ Cite TRL as:
 ```bibtex
 @misc{vonwerra2022trl,
 	title        = {{TRL: Transformer Reinforcement Learning}},
-	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
 	year         = 2020,
 	journal      = {GitHub repository},
 	publisher    = {GitHub},

 ---
+base_model: Qwen/Qwen2-0.5B-Instruct
 library_name: transformers
 model_name: Qwen2-0.5B-Reward
 tags:
 - generated_from_trainer
+- prm
 - trl
+- hf_jobs
 licence: license
 ---
 # Model Card for Qwen2-0.5B-Reward
+This model is a fine-tuned version of [Qwen/Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+This model was trained with PRM.
 ### Framework versions
+- TRL: 0.24.0.dev0
+- Transformers: 4.56.1
+- Pytorch: 2.8.0
+- Datasets: 4.0.0
+- Tokenizers: 0.22.0
 ## Citations
+Cite PRM as:
 ```bibtex
 @article{uesato2022solving,
+    title        = {{Solving Math Word Problems With Process- and Outcome-Based Feedback}},
     author       = {Uesato, Jonathan and Kushman, Nate and Kumar, Ramana and Song, Francis and Siegel, Noah and Wang, Lisa and Creswell, Antonia and Irving, Geoffrey and Higgins, Irina},
     year         = 2022,
     journal      = {arXiv preprint arXiv:2211.14275}
 ```bibtex
 @misc{vonwerra2022trl,
 	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
 	year         = 2020,
 	journal      = {GitHub repository},
 	publisher    = {GitHub},

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,6 @@

+{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
+You are a helpful assistant.<|im_end|>
+' }}{% endif %}{{'<|im_start|>' + message['role'] + '
+' + message['content'] + '<|im_end|>' + '
+'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
+' }}{% endif %}

config.json CHANGED Viewed

@@ -1,16 +1,41 @@
 {
-  "_name_or_path": "Qwen/Qwen2-0.5B",
   "architectures": [
     "Qwen2ForTokenClassification"
   ],
   "attention_dropout": 0.0,
-  "bos_token_id": 151643,
-  "eos_token_id": 151643,
   "hidden_act": "silu",
   "hidden_size": 896,
   "initializer_range": 0.02,
   "intermediate_size": 4864,
-  "max_position_embeddings": 131072,
   "max_window_layers": 24,
   "model_type": "qwen2",
   "num_attention_heads": 14,
@@ -22,8 +47,7 @@
   "rope_theta": 1000000.0,
   "sliding_window": null,
   "tie_word_embeddings": true,
-  "torch_dtype": "float32",
-  "transformers_version": "4.47.0.dev0",
   "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 151936

 {
   "architectures": [
     "Qwen2ForTokenClassification"
   ],
   "attention_dropout": 0.0,
+  "dtype": "float32",
+  "eos_token_id": 151645,
   "hidden_act": "silu",
   "hidden_size": 896,
   "initializer_range": 0.02,
   "intermediate_size": 4864,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
   "max_window_layers": 24,
   "model_type": "qwen2",
   "num_attention_heads": 14,
   "rope_theta": 1000000.0,
   "sliding_window": null,
   "tie_word_embeddings": true,
+  "transformers_version": "4.56.1",
   "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 151936

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b613f904affd73904f07247944d0372d2b2a5d614b24cad27465fef9dc5e499f
 size 1976170816

 version https://git-lfs.github.com/spec/v1
+oid sha256:19fb7634844ba4e2157a323aeb507a10f2a9bc977f797dda04130811ba4db2da
 size 1976170816

special_tokens_map.json CHANGED Viewed

@@ -4,7 +4,7 @@
     "<|im_end|>"
   ],
   "eos_token": {
-    "content": "<|endoftext|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

     "<|im_end|>"
   ],
   "eos_token": {
+    "content": "<|im_end|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -31,9 +31,8 @@
     "<|im_end|>"
   ],
   "bos_token": null,
-  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|endoftext|>",
   "errors": "replace",
   "extra_special_tokens": {},
   "model_max_length": 32768,

     "<|im_end|>"
   ],
   "bos_token": null,
   "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
   "model_max_length": 32768,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c29057260b37ba464a5874dd5ab5403968e08f2f16311a22fc1d82f3356a84ca
-size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:aebad9bdc8da61ae130c99fd20ae762b915956475209baf2ed6c414c18f14e04
+size 6033