Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| def parse_llm_response(response): | |
| """ | |
| 解析LLM响应并转换为标准格式 | |
| Args: | |
| response: 可以是字符串或带有content属性的对象 | |
| """ | |
| # 检查response类型并获取content | |
| if isinstance(response, str): | |
| content = response | |
| elif isinstance(response, dict) and 'content' in response: | |
| content = response['content'] | |
| else: | |
| content = response.content | |
| # 尝试查找JSON内容 | |
| try: | |
| # 首先尝试查找markdown JSON块 | |
| if '```json' in content: | |
| content = content.split('```json\n')[1].split('\n```')[0] | |
| elif '```' in content: | |
| content = content.split('```\n')[1].split('\n```')[0] | |
| # 如果上述方法失败,尝试查找方括号包围的JSON数组 | |
| if content.find('[') != -1 and content.find(']') != -1: | |
| start = content.find('[') | |
| end = content.rfind(']') + 1 | |
| content = content[start:end] | |
| # 清理可能的多余空白字符 | |
| content = content.strip() | |
| # 解析JSON | |
| triples = json.loads(content) | |
| # # 在解析时,检查是否存在实体和关系,不存在报错,提示重新生成 | |
| # cleaned_triples = [] | |
| # for triple in triples: | |
| # cleaned = { | |
| # "entity1": triple["entity1"], | |
| # "relation": triple["relation"], | |
| # "entity2": triple["entity2"] | |
| # } | |
| # cleaned_triples.append(cleaned) | |
| # 转换格式,检查是否存在实体和关系,不存在报错,提示重新生成 | |
| formatted_triples = { | |
| "triple_list": [ | |
| [triple["entity1"], triple["relation"], triple["entity2"]] | |
| for triple in triples | |
| if all(key in triple for key in ["entity1", "relation", "entity2"]) | |
| ] | |
| } | |
| # "triple_list": cleaned_triples | |
| return formatted_triples | |
| except json.JSONDecodeError as e: | |
| print(f"JSON解析错误,原始内容: {content}") | |
| print(f"错误详情: {str(e)}") | |
| return None | |
| def save_to_json(text, formatted_triples, model_series, output_dir='./output'): | |
| """ | |
| 保存结果到JSON文件 | |
| """ | |
| # 确保输出目录存在 | |
| os.makedirs(output_dir, exist_ok=True) | |
| output_path = f'{output_dir}/{model_series}.json' | |
| # 如果文件不存在则创建新文件,否则读取已有内容 | |
| if os.path.exists(output_path): | |
| with open(output_path, 'r', encoding='utf-8') as f: | |
| existing_data = json.load(f) | |
| else: | |
| existing_data = [] | |
| # 创建包含文本和三元组的新数据项 | |
| new_item = { | |
| "text": text, | |
| "triple_list": formatted_triples["triple_list"] | |
| } | |
| # 将新的数据项添加到已有数据中 | |
| if isinstance(existing_data, dict): | |
| existing_data = [existing_data] | |
| existing_data.append(new_item) | |
| # 输出更新后的JSON格式 | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| json.dump(existing_data, f, ensure_ascii=False, indent=4) | |
| # 保存原始响应函数,仿照save_to_json | |
| def save_raw_response(response, prompt, model_series, output_dir='./output/two_shot_raw'): | |
| """ | |
| 保存原始响应到JSON文件 | |
| """ | |
| # 确保输出目录存在 | |
| os.makedirs(output_dir, exist_ok=True) | |
| output_path = f'{output_dir}/{model_series}.json' | |
| # 如果文件不存在则创建新文件,否则读取已有内容 | |
| if os.path.exists(output_path): | |
| with open(output_path, 'r', encoding='utf-8') as f: | |
| existing_data = json.load(f) | |
| else: | |
| existing_data = [] | |
| # 创建包含文本和三元组的新数据项 | |
| new_item = { | |
| "prompt": prompt, | |
| # 解析response中的/n为回车 | |
| # AttributeError: 'dict' object has no attribute 'replace' | |
| # 将response转换为字符串 | |
| "response": response.replace('\\n', '\n') if isinstance(response, str) else str(response) | |
| } | |
| # 将新的数据项添加到已有数据中 | |
| existing_data.append(new_item) | |
| # 输出更新后的JSON格式 | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| json.dump(existing_data, f, ensure_ascii=False, indent=4) | |