import json
from utils.LLM import LLM_request
'''
Use GPT-4o-mini-2024-07-18 as reasoning model to extract core phrases/entities 
from CoT responses, replace original answers and save as new files
'''

# 读取JSON文件
model_results_paths = [
    './output/Task2/cot/cot/deepseek-ai/DeepSeek-R1_f.json',
    '....'
]
 

model_series = 'gpt'
model_name = 'gpt-4o-mini-2024-07-18'
prompt = '''
Extract the main factual information from the following sentence that answers the question. 
The answer should be entity phrases without additional explanations or prefix statements.
Question: {question}
Answer: {answer}
Please extract only the core answer:
'''
# Use GPT-4o-mini-2024-07-18 as reasoning model to extract core phrases/entities 
# from CoT responses, replace original answers and save as new files
for i in range(len(model_results_paths)):
    with open(model_results_paths[i], 'r', encoding='utf-8') as f:
        data = json.load(f)
    file_name = model_results_paths[i].split('/')[-1].split('_')[0]    
    print(file_name)
    new_data = []
    
    for j in range(len(data)):
        question = data[j]['question']
        answer = data[j]['answer']
        prompt = f"Extract the main factual information from the following sentence that answers the question. The answer should be entity phrases without additional explanations or prefix statements.\nQuestion: {question}\nAnswer: {answer}\nPlease extract only the core answer:"
        # print(prompt)
        response = LLM_request(model_series, model_name, prompt + '\n' + 'Do not include any other irrelevant explanations or meaningless responses')
        # print(response)

        core_answer = response.content if hasattr(response, 'content') else response
        
        # Add processed data to new list
        new_data.append({
            "question": question,
            "answer": core_answer
        })

    # Save new data to a new JSON file
    new_file_path = './output/Task2/cot/cot_new/'+file_name+'_f_processed.json'
    with open(new_file_path, 'w', encoding='utf-8') as f:
        json.dump(new_data, f, ensure_ascii=False, indent=4)