Spaces:

Ciallo0d00
/

GeoLLM

Runtime error

App Files Files Community

GeoLLM / Task2 /cot_process.py

Ciallo0d00

Upload folder using huggingface_hub

badcf3c verified 5 months ago

raw

history blame contribute delete

2.82 kB

	import json
	from LLM import zero_shot


	# 读取JSON文件
	model_results_paths = [
	# 'F:/GeoLLM/output/output_result/Task2/cot/cot/deepseek-ai/DeepSeek-R1_f_001.json',
	# 'F:/GeoLLM/output/output_result/Task2/cot/cot/gpt-3.5-turbo_f.json',
	# 'F:/GeoLLM/output/output_result/Task2/cot/cot/gpt-4o_f.json',
	# 'F:/GeoLLM/output/output_result/Task2/cot/cot/gemini-1.5-pro-002_f.json',
	# 'F:/GeoLLM/output/output_result/Task2/cot/cot/claude-3-5-haiku-20241022_f.json',
	# 'F:/GeoLLM/output/output_result/Task2/cot/cot/deepseek-ai/DeepSeek-V3_f.json',
	# 'F:/GeoLLM/output/output_result/Task2/cot/cot/deepseek-ai/DeepSeek-R1_f.json',
	'F:/GeoLLM/output/output_result/Task2/cot/cot/meta-llama/Meta-Llama-3.1-405B-Instruct_f.json',
	'F:/GeoLLM/output/output_result/Task2/cot/cot/Qwen/Qwen2.5-72B-Instruct_f.json',
	]


	model_series = 'gpt'
	model_name = 'gpt-4o-mini-2024-07-18'
	prompt = '''
	请从以下句子中提取出用于回答问题的主要事实性信息，答案应为实体短语，不要包含额外说明或前缀语句。
	问题：{question}
	回答：{answer}
	请仅提取出核心答案：
	'''
	# 使用gpt-4o-mini-2024-07-18作为推理模型，提取出cot的回答的核心短语/实体，替换原回答，另存为新文件
	for i in range(len(model_results_paths)):
	with open(model_results_paths[i], 'r', encoding='utf-8') as f:
	data = json.load(f)
	# 从model_results_paths中提取出json文件名称，并去掉_f.json
	file_name = model_results_paths[i].split('/')[-1].split('_')[0]
	print(file_name)
	# 创建一个新的列表来存储处理后的数据
	new_data = []

	for j in range(len(data)):
	question = data[j]['question']
	answer = data[j]['answer']
	prompt = f"请从以下句子中提取出用于回答问题的主要事实性信息，答案应为实体短语，不要包含额外说明或前缀语句。\n问题：{question}\n回答：{answer}\n请仅提取出核心答案："
	# print(prompt)
	response = zero_shot(model_series, model_name, prompt + '\n' + '不要包含任何其他无关解释和无意义回复')
	# print(response)
	# 提取 ChatCompletionMessage 对象中的内容
	core_answer = response.content if hasattr(response, 'content') else response

	# 将处理后的数据添加到新列表中
	new_data.append({
	"question": question,
	"answer": core_answer
	})

	# 将新数据保存到一个新的JSON文件中
	new_file_path = 'F:/GeoLLM/output/output_result/Task2/cot/cot_new/'+file_name+'_f_processed.json'
	with open(new_file_path, 'w', encoding='utf-8') as f:
	json.dump(new_data, f, ensure_ascii=False, indent=4)