Spaces:

Ciallo0d00
/

GeoLLM

Runtime error

App Files Files Community

GeoLLM / GeoLLM - 副本 /scripts /Task2 /cot_process.py

Ciallo0d00

Upload folder using huggingface_hub

badcf3c verified 5 months ago

raw

history blame contribute delete

2.18 kB

	import json
	from utils.LLM import LLM_request
	'''
	Use GPT-4o-mini-2024-07-18 as reasoning model to extract core phrases/entities
	from CoT responses, replace original answers and save as new files
	'''

	# 读取JSON文件
	model_results_paths = [
	'./output/Task2/cot/cot/deepseek-ai/DeepSeek-R1_f.json',
	'....'
	]


	model_series = 'gpt'
	model_name = 'gpt-4o-mini-2024-07-18'
	prompt = '''
	Extract the main factual information from the following sentence that answers the question.
	The answer should be entity phrases without additional explanations or prefix statements.
	Question: {question}
	Answer: {answer}
	Please extract only the core answer:
	'''
	# Use GPT-4o-mini-2024-07-18 as reasoning model to extract core phrases/entities
	# from CoT responses, replace original answers and save as new files
	for i in range(len(model_results_paths)):
	with open(model_results_paths[i], 'r', encoding='utf-8') as f:
	data = json.load(f)
	file_name = model_results_paths[i].split('/')[-1].split('_')[0]
	print(file_name)
	new_data = []

	for j in range(len(data)):
	question = data[j]['question']
	answer = data[j]['answer']
	prompt = f"Extract the main factual information from the following sentence that answers the question. The answer should be entity phrases without additional explanations or prefix statements.\nQuestion: {question}\nAnswer: {answer}\nPlease extract only the core answer:"
	# print(prompt)
	response = LLM_request(model_series, model_name, prompt + '\n' + 'Do not include any other irrelevant explanations or meaningless responses')
	# print(response)

	core_answer = response.content if hasattr(response, 'content') else response

	# Add processed data to new list
	new_data.append({
	"question": question,
	"answer": core_answer
	})

	# Save new data to a new JSON file
	new_file_path = './output/Task2/cot/cot_new/'+file_name+'_f_processed.json'
	with open(new_file_path, 'w', encoding='utf-8') as f:
	json.dump(new_data, f, ensure_ascii=False, indent=4)