GeoLLM / test.py
Ciallo0d00's picture
Upload folder using huggingface_hub
badcf3c verified
import json
def simple_convert(input_path, output_path):
"""
简单格式转换(字典转列表),保留原始结构
参数:
input_path: 输入文件路径
output_path: 输出文件路径
"""
with open(input_path, 'r', encoding='utf-8') as f:
data = json.load(f)
converted_data = []
for item in data:
new_item = {"text": item["text"]}
triple_list = []
for triple in item["triple_list"]:
# 严格校验三元组格式
required_keys = ['entity1', 'relation', 'entity2']
if not all(k in triple for k in required_keys):
print(f"跳过不完整的三元组:{triple}")
continue
# 转换为列表格式并保留原始顺序
triple_list.append([
triple['entity1'],
triple['relation'],
triple['entity2']
])
new_item["triple_list"] = triple_list
converted_data.append(new_item)
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(converted_data, f, ensure_ascii=False, indent=2)
print(f"转换完成!有效转换{len(converted_data)}条数据,保留原始三元组结构")
# 使用示例
if __name__ == "__main__":
input_file = r'F:\GeoLLM\output\three_shot\gemini-1.5-pro-0021.json'
output_file = r'F:\GeoLLM\output\three_shot\gemini-1.5-pro-002.json'
simple_convert(input_file, output_file)