#  pip install smolagents python-chess stockfish pandas numpy requests markdownify

# Generic agent
import os
from typing import Optional
import pandas as pd

# Genai imports
from google import genai
from google.genai import types

# Smolagents imports
from smolagents import (
    CodeAgent,
    InferenceClientModel,
    TransformersModel,
    LiteLLMModel, 
    DuckDuckGoSearchTool,
    VisitWebpageTool,
    PythonInterpreterTool,
    FinalAnswerTool,
)

# Import your custom tools (to be used in app, not in local notebook)
from tools.gemini_native_tools import analyze_video, analyze_image, analyze_audio
from tools.download_file       import download_file_from_url
from tools.files_to_text       import image_to_text, pdf_to_text, text_file_to_string
from tools.audio_tools         import youtube_to_text, transcribe_audio

# Define tools
AGENT_TOOLS = [
    # Default Tools
    DuckDuckGoSearchTool(),     # Internet search
    VisitWebpageTool(),         # Retrieve webpage content
    PythonInterpreterTool(),    # Executes agent-generated Python code
    FinalAnswerTool(),          # Ends agent reasoning and returns final answer

    # Custom Tools
    download_file_from_url,     # file downloader
    text_file_to_string,        # .txt, .md, .json, etc.
    pdf_to_text,                # PyMuPDF-based safe PDF parser
    image_to_text,              # OCR for images
    youtube_to_text,            # Youtube audio to text
    transcribe_audio,           # Audio file to text
]

# Gemini-only tools
NATIVE_TOOLS = [
    analyze_video,
    analyze_image,
    analyze_audio
]    

# Define authorized imports
AUTHORIZED_IMPORTS = [
    'numpy','re', 'pandas', 'json', 'datetime', 
    'tempfile','requests', 'markdownify', 'chess.*',
                ]

# --- SYSTEM PROMPT TEMPLATE ---
# The {} placeholder will be filled differently for Basic vs Gemini (Native)
SYSTEM_PROMPT_TEMPLATE = """
You are an expert **General AI Assistant** and **Python Programmer** tasked with solving complex GAIA benchmark problems.

### 1. Reason-Act-Observe
Follow a **PLAN → ACT → OBSERVE** loop:
- **PLAN:** Break the task into 1–3 logical steps. Identify tools for each step.
- **ACT:** Write and run one self-contained Python block per step.
- **OBSERVE:** Examine outputs or errors before proceeding.

### 2. File Handling
{file_handling_instructions}

**Important rules:**
- Whenever you are given a file path (or url), you **must  ABSOLUTELY store it in a variable first** (e.g. filepath`) and pass that variable directly to the next tool. **NEVER** try to write the path yourself in the function.
- You must **not** mix methods across file types (e.g. do not use Whisper for CSVs or pandas for audio).

### 3. Data Analysis & Answer
- Inspect loaded datasets first (`.head()`, `.info()`, `.describe()`) before analysis.
- Write clean, idiomatic Python code. Before that, check if there is any pre-made tool that would work for the task.

### 4. Additional instructions for the following tasks provided by GAIA team
- You are a general AI assistant. I will ask you a question. Do not reveal your internal reasoning. Only the content inside FinalAnswerTool will be evaluated.
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
- Do NOT include "FINAL ANSWER:" in your final answer text. For example: if the question is "What is the capital of Spain?", respond with "Madrid". It is exact and expected answer.

### 5. To provide the final answer, you MUST call the final_answer tool inside a <code> block.

- Example of how to end the task:

Question: "What is the capital of France?"
Thought: I have found the answer. I will now provide it.

<code>
final_answer("Paris")
</code>

"""

# Instruction for Tool-Based Agents (BasicAgent and Gemini-Standard)
TOOL_BASED_INSTRUCTIONS = """
You must select the reading or transcription method **strictly** based on the file type:
| File Type / Source | Tool / Method to Use |
| :--- | :--- |
| `.csv` | `pd.read_csv(filepath)` |
| `.xlsx`, `.xls` | `pd.read_excel(filepath)` |
| `.pdf` | `pdf_to_text(filepath)` |
| `.txt`, `.md`, `.json` | `text_file_to_string(filepath)` |
| `.png`, `.jpg`, `.jpeg` | `image_to_text(filepath)` |
| **YouTube URL** | `youtube_to_text(url)` |
| `.mp3`, `.wav`, `.m4a` | `transcribe_audio(filepath)` |
"""

# Instruction for Native Gemini (No OCR/Transcribe tools for media)
NATIVE_MEDIA_INSTRUCTIONS = """
You have **native vision and audio capabilities**. 
- For **Images (.png, .jpg) and Audio/Video**: Do NOT use external tools like `image_to_text`. You can see and hear these files directly. Analyze them using your internal multimodal capabilities.
- For **Data/Text files**: Continue using tools like `pd.read_csv(filepath)` or `text_file_to_string(filepath)`.
"""

class BasicAgent:
    def __init__(self):
        self.system_prompt = SYSTEM_PROMPT_TEMPLATE.format(file_handling_instructions=TOOL_BASED_INSTRUCTIONS)
        self.model  = InferenceClientModel(
            model_id    = "Qwen/Qwen3-Next-80B-A3B-Thinking",
            temperature = 0.0,
            top_p       = 1.0,  
            max_tokens  = 8196,
            )
        self.basic_agent = CodeAgent(
            name           = "basic_agent",
            description    = "Basic smolagents CodeAgent",
            model          = self.model,
            tools          = AGENT_TOOLS,
            add_base_tools = True,        # probably redundant, but it does not hurt
            max_steps      = 5, 
            additional_authorized_imports = AUTHORIZED_IMPORTS,
            verbosity_level = 1,
            max_print_outputs_length=1_000_000
            )
        
        print("✅ Basic agent initialized")
                
    def __call__(self, question: str, file_path: Optional[str] = None) -> str:
        prompt = f"{self.system_prompt}\n\nQuestion: {question}"
        if file_path:
            prompt += f"\nFile path: {file_path}"            
        return self.basic_agent.run(prompt)
    
class GeminiAgent:
    def __init__(self, native_multimodal: bool = True, model_id: str = "gemini/gemini-2.5-flash-lite"):
    # def __init__(self, native_multimodal: bool = True, model_id: str = "gemini/gemini-3-flash-preview"):
        self.native_multimodal = native_multimodal
        self.model_id = model_id

        if self.native_multimodal:
            client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))

        # Switch prompt based on the native_multimodal flag
        INSTRUCTIONS = NATIVE_MEDIA_INSTRUCTIONS if native_multimodal else TOOL_BASED_INSTRUCTIONS
        self.system_prompt = SYSTEM_PROMPT_TEMPLATE.format(file_handling_instructions=INSTRUCTIONS)
        
        GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
        if not GOOGLE_API_KEY:
            raise RuntimeError(
                "GOOGLE_API_KEY not found."
            )
        self.model = LiteLLMModel(
            model_id    = model_id,
            api_key     = GOOGLE_API_KEY, 
            temperature = 0.0,
            top_p       = 1.0,  
            max_tokens  = 8196,
            timeout     = 120   # Add timeout to prevent hanging
            )
        # If native, we can optionally remove image_to_text from tools to prevent the agent from getting confused
        if self.native_multimodal:
            self.tools = NATIVE_TOOLS + [t for t in AGENT_TOOLS if t not in [image_to_text, youtube_to_text, transcribe_audio]]
        else:
            self.tools = AGENT_TOOLS
        self.gemini_agent = CodeAgent(
            name           = "gemini_agent",
            description    = f"Gemini CodeAgent ({model_id})",
            model          = self.model,
            tools          = self.tools,
            add_base_tools = True,        # probably redundant, but it does not hurt
            max_steps      = 2, 
            additional_authorized_imports = AUTHORIZED_IMPORTS,
            verbosity_level = 1,
            max_print_outputs_length=1_000_000
            )
        
        print(f"✅ Gemini agent initialized with model: {model_id}")
                
    def __call__(self, question: str, file_path: Optional[str] = None) -> str:
        prompt = f"{self.system_prompt}\n\nQuestion: {question}"
        if file_path:
            prompt += f"\n\nThere is a file at: {file_path}. Use your tools to process it."
            
        return self.gemini_agent.run(prompt)