# =============================== # Derma Space: Dataset + Gradio Chatbot # =============================== import json import random import os import gradio as gr from datasets import load_dataset from huggingface_hub import HfApi, login, upload_file # ------------------------ # 1️⃣ ورود با Secret # ------------------------ hf_token = os.environ.get("HF_TOKEN", None) if hf_token is None: raise ValueError("HF_TOKEN not found in Secrets. Please add it in Space settings.") login(token=hf_token) # ------------------------ # 2️⃣ ساخت دیتاست ترکیبی امن # ------------------------ def build_dataset(): print("Creating a small general dataset...") general_examples = [ {"domain":"general", "context":"Hello, how are you?", "response":"I'm good, thank you!"}, {"domain":"general", "context":"What's your name?", "response":"I'm Derma ChatBot."} ] # ----- Dermatology QA (Mreeb) print("Loading Dermatology QA (Mreeb)...") derma = load_dataset("Mreeb/Dermatology-Question-Answer-Dataset-For-Fine-Tuning")['train'] print("Columns in Mreeb dataset:", derma.column_names) derma_examples = [] for item in derma: q = item.get('prompt') # Mreeb uses 'prompt' instead of 'question' a = item.get('response') if q and a: derma_examples.append({"domain":"dermatology","context":q,"response":a}) all_examples = general_examples + derma_examples random.shuffle(all_examples) # ذخیره به JSONL output_file = "derma_chat_mix.jsonl" with open(output_file, 'w', encoding='utf-8') as f: for ex in all_examples: f.write(json.dumps(ex, ensure_ascii=False) + "\n") print(f"✅ Dataset saved locally as {output_file} ({len(all_examples)} examples)") # آپلود به HF repo_id = "username/Derma" # تغییر بده به نام کاربری خودت api = HfApi() api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True) upload_file( path_or_fileobj=output_file, path_in_repo=output_file, repo_id=repo_id, repo_type="dataset", commit_message="Initial upload of text-based chat dataset" ) print(f"✅ Dataset uploaded: https://huggingface.co/datasets/{repo_id}") # ------------------------ # 3️⃣ چت ساده با Gradio # ------------------------ def simple_chat(user_input): with open("derma_chat_mix.jsonl", 'r', encoding='utf-8') as f: data = [json.loads(line) for line in f] best_match = None max_overlap = 0 for item in data: overlap = len(set(user_input.lower().split()) & set(item['context'].lower().split())) if overlap > max_overlap: max_overlap = overlap best_match = item['response'] if best_match: return best_match else: return "Sorry, I don't have a good answer for that. Try another question!" # ------------------------ # 4️⃣ راه‌اندازی Gradio # ------------------------ iface = gr.Interface( fn=simple_chat, inputs=gr.Textbox(lines=2, placeholder="Ask about dermatology or chat casually..."), outputs=gr.Textbox(label="Derma ChatBot"), title="Derma ChatBot", description="A simple English chatbot combining general conversation + dermatology QA." ) # ------------------------ # 5️⃣ اجرای دیتاست + رابط # ------------------------ if __name__ == "__main__": if not os.path.exists("derma_chat_mix.jsonl"): build_dataset() iface.launch()