import os
import json
import pandas as pd

# # 定义文件路径
# folder_path = r".\人工评估\Task1"
# gt_file = "GT_500.json"
# llm_files = [
#     "gpt-3.5-turbo.json",
#     "gpt-4o.json",
#     "claude-3-5-haiku-20241022.json",
#     "gemini-1.5-pro-002.json",
#     "DeepSeek-R1.json",
#     "DeepSeek-V3.json",
#     "Meta-Llama-3.1-405B-Instruct.json",
#     "Qwen2.5-72B-Instruct.json"
# ]

# # 加载GT文件
# with open(os.path.join(folder_path, gt_file), "r", encoding="utf-8") as f:
#     gt_data = json.load(f)

# # 初始化一个空的DataFrame
# columns = ["text", "对应ground truth三元组"] + [f"{os.path.splitext(file)[0]}提取的三元组" for file in llm_files]
# df = pd.DataFrame(columns=columns)

# # 填充GT数据
# data_rows = []
# for item in gt_data:
#     text = item["text"]
#     gt_triples = "; ".join([f"{triple[0]}-{triple[1]}-{triple[2]}" for triple in item["triple_list"]])
#     data_rows.append({"text": text, "对应ground truth三元组": gt_triples})
# df = pd.concat([df, pd.DataFrame(data_rows)], ignore_index=True)

# # 加载LLM文件并填充数据
# for file in llm_files:
#     with open(os.path.join(folder_path, file), "r", encoding="utf-8") as f:
#         llm_data = json.load(f)
#     for i, item in enumerate(llm_data):
#         llm_triples = "; ".join([f"{triple[0]}-{triple[1]}-{triple[2]}" for triple in item["triple_list"]])
#         df.at[i, f"{os.path.splitext(file)[0]}提取的三元组"] = llm_triples

# # 保存到Excel文件
# output_file = os.path.join(folder_path, "consolidated_results.xlsx")
# df.to_excel(output_file, index=False, engine="openpyxl")

# print(f"数据已成功整合到Excel文件：{output_file}")


# 定义文件路径
folder_path = r".\人工评估\Task2"
gt_file = "data.xlsx"
llm_files = [
    "gpt-3.5-turbo_f.json",
    "gpt-4o_f.json",
    "claude-3-5-haiku-20241022_f.json",
    "gemini-1.5-pro-002_f.json",
    "DeepSeek-R1_f.json",
    "DeepSeek-V3_f.json",
    "Meta-Llama-3.1-405B-Instruct_f.json",
    "Qwen2.5-72B-Instruct_f.json"
]

# 加载GT文件（Excel文件）
gt_df = pd.read_excel(os.path.join(folder_path, gt_file))

# 初始化一个空的DataFrame
columns = ["Text", "Question", "对应问题的标准答案"] + [f"{os.path.splitext(file)[0]}回答的结果" for file in llm_files]
df = pd.DataFrame(columns=columns)

# 填充GT数据
data_rows = []
for index, row in gt_df.iterrows():
    text = row["Text"]
    question = row["Question"]
    answer = row["Answer"]
    data_rows.append({
        "Text": text,
        "Question": question,
        "对应问题的标准答案": answer
    })
df = pd.concat([df, pd.DataFrame(data_rows)], ignore_index=True)

# 加载LLM文件并填充数据
for file in llm_files:
    with open(os.path.join(folder_path, file), "r", encoding="utf-8") as f:
        llm_data = json.load(f)
    for i, item in enumerate(llm_data):
        llm_answer = item["answer"]
        df.at[i, f"{os.path.splitext(file)[0]}回答的结果"] = llm_answer

# 保存到Excel文件
output_file = os.path.join(folder_path, "consolidated_results.xlsx")
df.to_excel(output_file, index=False, engine="openpyxl")

print(f"数据已成功整合到Excel文件：{output_file}")