import json import os from typing import Optional, Union from PIL import Image import base64 from io import BytesIO import gradio as gr import markdown import zipfile import tempfile from datetime import datetime import re def export_to_zip(images, conversations, format_type="original"): """ Export images and conversation data to a ZIP file Args: images: List of extracted images conversations: Conversation JSON data format_type: Format type, "original" or "sharegpt" Returns: Path to the generated ZIP file """ # Create a temporary directory temp_dir = tempfile.mkdtemp() timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") zip_filename = os.path.join(temp_dir, f"export_{timestamp}.zip") # Create a ZIP file with zipfile.ZipFile(zip_filename, 'w') as zipf: # Save images for i, img in enumerate(images): img_path = os.path.join(temp_dir, f"image_{i}.png") img.save(img_path) zipf.write(img_path, f"images/image_{i}.png") os.remove(img_path) # Delete temporary image file # Save conversation data json_path = os.path.join(temp_dir, "conversations.json") with open(json_path, 'w', encoding='utf-8') as f: json.dump(conversations, f, ensure_ascii=False, indent=4) zipf.write(json_path, "conversations.json") os.remove(json_path) # Delete temporary JSON file return zip_filename def base64_to_image( base64_str: str, remove_prefix: bool = True, convert_mode: Optional[str] = "RGB" ) -> Union[Image.Image, None]: """ Convert a base64 encoded image string to a PIL Image object Args: base64_str: Base64 encoded image string (with or without data: prefix) remove_prefix: Whether to automatically remove the "data:image/..." prefix (default True) convert_mode: Convert to the specified mode (e.g., "RGB"/"RGBA", None means no conversion) Returns: PIL.Image.Image object, returns None if decoding fails """ try: # 1. Handle Base64 prefix if remove_prefix and "," in base64_str: base64_str = base64_str.split(",")[1] # 2. Decode Base64 image_data = base64.b64decode(base64_str) # 3. Convert to PIL Image image = Image.open(BytesIO(image_data)) # 4. Optional mode conversion if convert_mode: image = image.convert(convert_mode) return image except (base64.binascii.Error, OSError, Exception) as e: print(f"Base64 decoding failed: {str(e)}") return None def process_message_to_sharegpt_format(message): """ Convert messages to ShareGPT format Args: message: Original message data Returns: Data in ShareGPT format """ sharegpt_images = [] sharegpt_conversation = [] image_idx = 0 for i, message_item in enumerate(message): role = message_item['role'] content_list = message_item['content'] whole_content = "" for content_item in content_list: content_type = content_item['type'] if content_type == "text": content_value = content_item['text'] whole_content += content_value elif content_type == "image_url": content_value = content_item['image_url']['url'] whole_content += "" image = base64_to_image(content_value) if image: sharegpt_images.append(image) image_idx += 1 if i == 0: sharegpt_conversation.append({"from": "human", "value": whole_content}) continue if "" in whole_content: gpt_content, observation_content = whole_content.split("", -1) sharegpt_conversation.append({"from": "gpt", "value": gpt_content}) sharegpt_conversation.append({"from": "observation", "value": ""+observation_content}) elif i != 0: sharegpt_conversation.append({"from": "gpt", "value": whole_content}) sharegpt_data_item = { "conversations": sharegpt_conversation, "images": sharegpt_images } return sharegpt_data_item def extract_images_from_messages(messages): """ Extract all images from messages Args: messages: Message JSON data Returns: Extracted image list and updated messages """ images = [] for message in messages: if 'content' in message and isinstance(message['content'], list): for content_item in message['content']: if content_item.get('type') == 'image_url': image_url = content_item.get('image_url', {}).get('url', '') if image_url.startswith('data:'): # Extract base64 image image = base64_to_image(image_url) if image: images.append(image) return images, messages def process_message(file_path): try: # Read JSON file with open(file_path, "r", encoding="utf-8") as f: messages = json.load(f) # Extract images images, messages = extract_images_from_messages(messages) # Convert to ShareGPT format sharegpt_data = process_message_to_sharegpt_format(messages) # Create HTML output html_output = '
' # Add a wrapper div for all content, set text color black for message_item in messages: role = message_item['role'] content = message_item['content'] # Style based on role if role == "user" or role == "human": html_output += f'
User:
' elif role == "assistant": html_output += f'
Assistant:
' else: html_output += f'
{role.capitalize()}:
' # Handle content for content_item in content: content_type = content_item['type'] if content_type == "text": # Convert Markdown text to HTML md_text = content_item['text'] html_text = markdown.markdown(md_text, extensions=['fenced_code', 'codehilite']) html_output += f'
{html_text}
' elif content_type == "image_url": content_value = content_item['image_url']['url'] # If base64 image if content_value.startswith("data:"): html_output += f'' else: html_output += f'' html_output += '
' html_output += '
' # Close outermost div return html_output, images, messages, sharegpt_data except Exception as e: return f"
Error processing file: {str(e)}
", [], None, None def upload_and_process(file): if file is None: return "Please upload a JSON file", [], None, None html_output, images, messages, sharegpt_data = process_message(file.name) return html_output, images, messages, sharegpt_data def use_example(): # Use example file example_path = "test_message_gpt.json" return process_message(example_path) def handle_export_original(images, conversations): """Handle export request for original format""" if not images or conversations is None: return None zip_path = export_to_zip(images, conversations, "original") return zip_path def handle_export_sharegpt(sharegpt_data): """Handle export request for ShareGPT format""" if sharegpt_data is None: return None images = sharegpt_data.get("images", []) conversations = sharegpt_data.get("conversations", []) if not images and not conversations: return None zip_path = export_to_zip(images, conversations, "sharegpt") return zip_path # Ensure example file exists def setup_example_file(): # Here we need to create the example file because we don't have actual content # In a real application, you should place the original test_message_gpt.json file in the root directory example_path = "test_message_gpt.json" # Create a simple example if the file does not exist if not os.path.exists(example_path): example_messages = [ { "role": "user", "content": [ { "type": "text", "text": "Hello, please introduce yourself." } ] }, { "role": "assistant", "content": [ { "type": "text", "text": "Hello! I am an AI assistant. I can help answer questions, provide information, and have conversations. I am designed to assist users with a variety of tasks, from simple Q&A to more complex discussions.\n\nI can handle text information and also understand and describe images. Although I have some limitations, I will do my best to provide useful, accurate, and helpful responses.\n\nHow can I help you today?" } ] } ] with open(example_path, "w", encoding="utf-8") as f: json.dump(example_messages, f, ensure_ascii=False, indent=2) # Set up the example file setup_example_file() # Create Gradio interface with gr.Blocks(title="ChatGPT Conversation Visualizer", css="div.prose * {color: black !important;}") as demo: gr.Markdown("# ChatGPT Conversation Visualization Tool") gr.Markdown("Upload a JSON file containing ChatGPT conversation records or use the example file to view visualization results.") with gr.Row(): file_input = gr.File(label="Upload JSON File", file_types=[".json"]) with gr.Row(): col1, col2 = gr.Column(), gr.Column() with col1: visualize_button = gr.Button("Visualize Uploaded Conversation") with col2: example_button = gr.Button("Use Example File") with gr.Row(): output = gr.HTML(label="Conversation Content") # Add export buttons with gr.Row(): with gr.Column(): export_original_btn = gr.Button("Export Original Format") download_original_file = gr.File(label="Download Original Format ZIP") with gr.Column(): export_sharegpt_btn = gr.Button("Export ShareGPT Format") download_sharegpt_file = gr.File(label="Download ShareGPT Format ZIP") # State variables to store current results current_images = gr.State([]) current_json = gr.State(None) current_sharegpt = gr.State(None) visualize_button.click( fn=upload_and_process, inputs=[file_input], outputs=[output, current_images, current_json, current_sharegpt] ) example_button.click( fn=use_example, inputs=[], outputs=[output, current_images, current_json, current_sharegpt] ) export_original_btn.click( fn=handle_export_original, inputs=[current_images, current_json], outputs=[download_original_file] ) export_sharegpt_btn.click( fn=handle_export_sharegpt, inputs=[current_sharegpt], outputs=[download_sharegpt_file] ) # Launch Gradio app demo.launch()