verl_subquestion / make_grpo_dataset.py
zswzswzsw's picture
Upload folder using huggingface_hub
66407c5 verified
import argparse
import os
import datasets
from datasets import load_dataset
from verl.utils.hdfs_io import copy, makedirs
from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
def extract_solution(solution_str):
return remove_boxed(last_boxed_only_string(solution_str))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--local_dir", default="./data/math")
args = parser.parse_args()
# 'lighteval/MATH' is no longer available on huggingface.
# Use mirror repo: DigitalLearningGmbH/MATH-lighteval
data_source = "DigitalLearningGmbH/MATH-lighteval"
train_dataset = load_dataset("json", data_files='grpo_1.5B_without_sub_data.json', split="train[100:]")
test_dataset = load_dataset("json", data_files='grpo_1.5B_without_sub_data.json', split="train[:100]")
instruction_following = "Let's think step by step and output the final answer within \\boxed{}."
# add a row to each data item that represents a unique id
def make_map_fn(split):
def process_fn(example, idx):
question = example.pop("question")
question = question + " " + instruction_following
solution = example['gold']
if example['type']=='sub':
solution = eval(solution)
else:
solution = [solution]
data = {
"data_source": data_source,
"prompt": [{"role": "user", "content": question}],
"ability": "math",
"reward_model": {"style": "rule", "ground_truth": solution},
"extra_info": {"split": split, "index": idx},
}
return data
return process_fn
train_dataset = train_dataset.map(function=make_map_fn("train"), with_indices=True)
test_dataset = test_dataset.map(function=make_map_fn("test"), with_indices=True)
local_dir = args.local_dir
train_dataset.to_parquet(os.path.join(local_dir, "grpo_mid_train.parquet"))
test_dataset.to_parquet(os.path.join(local_dir, "grpo_mid_test.parquet"))
for i in range(5):
print(train_dataset[i])
print(len(train_dataset))
print(len(test_dataset))