Ciallo0d00's picture
Upload folder using huggingface_hub
badcf3c verified
import json
from utils.LLM import LLM_request
'''
Use GPT-4o-mini-2024-07-18 as reasoning model to extract core phrases/entities
from CoT responses, replace original answers and save as new files
'''
# 读取JSON文件
model_results_paths = [
'./output/Task2/cot/cot/deepseek-ai/DeepSeek-R1_f.json',
'....'
]
model_series = 'gpt'
model_name = 'gpt-4o-mini-2024-07-18'
prompt = '''
Extract the main factual information from the following sentence that answers the question.
The answer should be entity phrases without additional explanations or prefix statements.
Question: {question}
Answer: {answer}
Please extract only the core answer:
'''
# Use GPT-4o-mini-2024-07-18 as reasoning model to extract core phrases/entities
# from CoT responses, replace original answers and save as new files
for i in range(len(model_results_paths)):
with open(model_results_paths[i], 'r', encoding='utf-8') as f:
data = json.load(f)
file_name = model_results_paths[i].split('/')[-1].split('_')[0]
print(file_name)
new_data = []
for j in range(len(data)):
question = data[j]['question']
answer = data[j]['answer']
prompt = f"Extract the main factual information from the following sentence that answers the question. The answer should be entity phrases without additional explanations or prefix statements.\nQuestion: {question}\nAnswer: {answer}\nPlease extract only the core answer:"
# print(prompt)
response = LLM_request(model_series, model_name, prompt + '\n' + 'Do not include any other irrelevant explanations or meaningless responses')
# print(response)
core_answer = response.content if hasattr(response, 'content') else response
# Add processed data to new list
new_data.append({
"question": question,
"answer": core_answer
})
# Save new data to a new JSON file
new_file_path = './output/Task2/cot/cot_new/'+file_name+'_f_processed.json'
with open(new_file_path, 'w', encoding='utf-8') as f:
json.dump(new_data, f, ensure_ascii=False, indent=4)