Spaces:

TuringsSolutions
/

Tool_World

Sleeping

App Files Files Community

TuringsSolutions commited on Jul 6, 2025

Commit

223bfba

verified ·

1 Parent(s): dbbd401

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -47

app.py CHANGED Viewed

@@ -5,11 +5,9 @@
 #  This script has been updated to run as a Hugging Face Space.
 #
 #  Key Upgrades from the original script:
-#  1.  **Hugging Face Model Integration**: Uses the 'google/gemma-3n-E4B' model
-#      from the Hugging Face Hub for argument extraction.
-#  2.  **Environment Variable Management**: Securely accesses the
-#      HUGGING_FACE_HUB_TOKEN using os.environ.get(), which is the standard
-#      for Hugging Face Spaces.
 #  3.  **Standard Dependencies**: All dependencies are managed via a
 #      `requirements.txt` file.
 #
@@ -40,40 +38,19 @@ print("✅ Embedding model loaded.")
 # --- Configuration for Hugging Face Model-based Argument Extraction ---
 try:
-    HF_TOKEN = os.environ.get('HUGGING_FACE_HUB_TOKEN')
-    if HF_TOKEN is None:
-        raise ValueError("HUGGING_FACE_HUB_TOKEN secret not found.")
     print("⚙️ Loading Hugging Face model for argument extraction...")
-    # Using the user-specified Gemma model
-    model_id = "google/gemma-3n-E4B"
-    hf_tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
-    # --------------------------------------------------------------------------
-    #  ✅ FIX: Manually set the chat template for the Gemma model.
-    #  This is required because the specified model does not have a default
-    #  template set in its tokenizer config on the Hugging Face Hub.
-    # --------------------------------------------------------------------------
-    gemma_template = (
-        "{% for message in messages %}"
-        "{{'<start_of_turn>' + message['role'] + '\n' + message['content'] + '<end_of_turn>\n'}}"
-        "{% endfor %}"
-        "{% if add_generation_prompt %}"
-        "{{ '<start_of_turn>model\n' }}"
-        "{% endif %}"
-    )
-    hf_tokenizer.chat_template = gemma_template
-    # --------------------------------------------------------------------------
     hf_model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        token=HF_TOKEN,
         torch_dtype=torch.bfloat16, # Use bfloat16 for efficiency
         device_map="auto" # Automatically use GPU if available
     )
     USE_HF_LLM = True
-    print(f"✅ Successfully loaded '{model_id}' model and set chat template.")
 except Exception as e:
     USE_HF_LLM = False
@@ -289,27 +266,27 @@ def extract_arguments_hf(user_prompt: str, tool: Tool):
     Uses a local Hugging Face model to extract structured arguments.
     """
     system_prompt = f"""
-    You are an expert at extracting structured data from natural language.
-    Your task is to analyze the user's prompt and extract the arguments required to call the tool: '{tool.name}'.
-    You must adhere to the following JSON schema for the arguments:
-    {json.dumps(tool.args_schema, indent=2)}
-    - If a value is not present in the prompt for a non-required field, omit it from the JSON.
-    - If a required value is missing, return a JSON object with an "error" key explaining what is missing.
-    - Today's date is {datetime.now().strftime('%Y-%m-%d')}. If the user says "tomorrow", use {(datetime.now() + timedelta(days=1)).strftime('%Y-%m-%d')}.
-    - Respond ONLY with a valid JSON object. Do not include any other text, explanation, or markdown code blocks.
-    """
-    # Gemma instruction-following format
     chat = [
-        # Gemma does not use a 'system' role. Instructions are part of the first user message.
-        {"role": "user", "content": f"{system_prompt}\n\nUser Prompt: \"{user_prompt}\""},
     ]
-    prompt = hf_tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
     try:
         inputs = hf_tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(hf_model.device)
         # Generate with the model
@@ -338,7 +315,7 @@ def execute_tool(user_prompt: str):
     selected_tool, score, _ = find_best_tool(user_prompt)
     if USE_HF_LLM:
-        print(f"⚙️ Selected Tool: {selected_tool.name}. Extracting arguments with Gemma...")
         extracted_args = extract_arguments_hf(user_prompt, selected_tool)
     else:
         # Fallback if the model failed to load
@@ -445,7 +422,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🛠️ Tool World: Advanced Prototype (Hugging Face Version)")
     gr.Markdown(
         "Enter a natural language command. The system will select the best tool, "
-        "extract structured arguments with **google/gemma-3n-E4B**, and execute it."
     )
     with gr.Row():

 #  This script has been updated to run as a Hugging Face Space.
 #
 #  Key Upgrades from the original script:
+#  1.  **Hugging Face Model Integration**: Uses the fast 'Qwen/Qwen2-0.5B-Instruct'
+#      model from the Hugging Face Hub for argument extraction.
+#  2.  **Simplified Setup**: This model does not require a Hugging Face token.
 #  3.  **Standard Dependencies**: All dependencies are managed via a
 #      `requirements.txt` file.
 #
 # --- Configuration for Hugging Face Model-based Argument Extraction ---
 try:
     print("⚙️ Loading Hugging Face model for argument extraction...")
+    # Using the fast Qwen2 0.5B Instruct model
+    model_id = "Qwen/Qwen2-0.5B-Instruct"
+    hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
     hf_model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.bfloat16, # Use bfloat16 for efficiency
         device_map="auto" # Automatically use GPU if available
     )
     USE_HF_LLM = True
+    # The Qwen2 tokenizer has a built-in chat template, so we don't need to set it manually.
+    print(f"✅ Successfully loaded '{model_id}' model.")
 except Exception as e:
     USE_HF_LLM = False
     Uses a local Hugging Face model to extract structured arguments.
     """
     system_prompt = f"""
+You are an expert at extracting structured data from natural language.
+Your task is to analyze the user's prompt and extract the arguments required to call the tool: '{tool.name}'.
+You must adhere to the following JSON schema for the arguments:
+{json.dumps(tool.args_schema, indent=2)}
+- If a value is not present in the prompt for a non-required field, omit it from the JSON.
+- If a required value is missing, return a JSON object with an "error" key explaining what is missing.
+- Today's date is {datetime.now().strftime('%Y-%m-%d')}. If the user says "tomorrow", use {(datetime.now() + timedelta(days=1)).strftime('%Y-%m-%d')}.
+- Respond ONLY with a valid JSON object. Do not include any other text, explanation, or markdown code blocks.
+"""
+    # Qwen2 instruction-following format
     chat = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
     ]
     try:
+        # The tokenizer for Qwen2 has a built-in chat template.
+        prompt = hf_tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
         inputs = hf_tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(hf_model.device)
         # Generate with the model
     selected_tool, score, _ = find_best_tool(user_prompt)
     if USE_HF_LLM:
+        print(f"⚙️ Selected Tool: {selected_tool.name}. Extracting arguments with Qwen2...")
         extracted_args = extract_arguments_hf(user_prompt, selected_tool)
     else:
         # Fallback if the model failed to load
     gr.Markdown("# 🛠️ Tool World: Advanced Prototype (Hugging Face Version)")
     gr.Markdown(
         "Enter a natural language command. The system will select the best tool, "
+        "extract structured arguments with **Qwen/Qwen2-0.5B-Instruct**, and execute it."
     )
     with gr.Row():