zswzswzsw
/

verl_subquestion

Model card Files Files and versions

verl_subquestion / make_grpo_dataset.py

zswzswzsw's picture

Upload folder using huggingface_hub

66407c5 verified about 1 month ago

history blame contribute delete

2.23 kB


	import argparse
	import os

	import datasets
	from datasets import load_dataset
	from verl.utils.hdfs_io import copy, makedirs
	from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed


	def extract_solution(solution_str):
	return remove_boxed(last_boxed_only_string(solution_str))


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--local_dir", default="./data/math")

	args = parser.parse_args()

	# 'lighteval/MATH' is no longer available on huggingface.
	# Use mirror repo: DigitalLearningGmbH/MATH-lighteval
	data_source = "DigitalLearningGmbH/MATH-lighteval"
	train_dataset = load_dataset("json", data_files='grpo_1.5B_without_sub_data.json', split="train[100:]")
	test_dataset = load_dataset("json", data_files='grpo_1.5B_without_sub_data.json', split="train[:100]")
	instruction_following = "Let's think step by step and output the final answer within \\boxed{}."

	# add a row to each data item that represents a unique id
	def make_map_fn(split):
	def process_fn(example, idx):
	question = example.pop("question")

	question = question + " " + instruction_following
	solution = example['gold']
	if example['type']=='sub':
	solution = eval(solution)
	else:
	solution = [solution]
	data = {
	"data_source": data_source,
	"prompt": [{"role": "user", "content": question}],
	"ability": "math",
	"reward_model": {"style": "rule", "ground_truth": solution},
	"extra_info": {"split": split, "index": idx},
	}
	return data

	return process_fn

	train_dataset = train_dataset.map(function=make_map_fn("train"), with_indices=True)
	test_dataset = test_dataset.map(function=make_map_fn("test"), with_indices=True)

	local_dir = args.local_dir

	train_dataset.to_parquet(os.path.join(local_dir, "grpo_mid_train.parquet"))
	test_dataset.to_parquet(os.path.join(local_dir, "grpo_mid_test.parquet"))
	for i in range(5):
	print(train_dataset[i])
	print(len(train_dataset))
	print(len(test_dataset))