| | |
| | from __future__ import annotations |
| | import re |
| | import os |
| | from typing import Any, Dict, Optional |
| |
|
| | from re_call import ReCall |
| | from transformers import AutoTokenizer |
| |
|
| |
|
| | import re |
| | from typing import Optional, Any, Dict, Tuple, List |
| |
|
| | def _extract_answer_boxed(s: str) -> Optional[str]: |
| | """ |
| | Return the content of the *last* \\boxed{...} or \\fbox{...} in `s`, |
| | with proper matching of nested braces. Escaped braces (\\{, \\}) are ignored |
| | for counting. If no balanced block is found, returns None. |
| | """ |
| | def _iter_box_like_spans(text: str): |
| | |
| | openings: List[Tuple[str, int, int]] = [] |
| | for m in re.finditer(r'\\boxed\s*\{', text): |
| | openings.append(("boxed", m.start(), m.end())) |
| | for m in re.finditer(r'\\fbox\s*\{', text): |
| | openings.append(("fbox", m.start(), m.end())) |
| | openings.sort(key=lambda x: x[1]) |
| | |
| | for kind, start, open_end in openings: |
| | depth = 1 |
| | i = open_end |
| | n = len(text) |
| | while i < n: |
| | ch = text[i] |
| | |
| | if ch == '\\' and i + 1 < n: |
| | i += 2 |
| | continue |
| | if ch == '{': |
| | depth += 1 |
| | elif ch == '}': |
| | depth -= 1 |
| | if depth == 0: |
| | |
| | yield (kind, start, open_end, i) |
| | break |
| | i += 1 |
| |
|
| | last_content: Optional[str] = None |
| | for _, _start, open_end, close_idx in _iter_box_like_spans(s): |
| | last_content = s[open_end:close_idx] |
| |
|
| | return last_content.strip() if last_content is not None else None |
| |
|
| |
|
| | def _extract_answer_tagged(s: str) -> Optional[str]: |
| | answer_tag_re = re.compile(r"<answer>(.*?)</answer>", re.S) |
| | m = answer_tag_re.findall(s) |
| | return m[-1].strip() if m else None |
| |
|
| |
|
| | def _parse_answer_from_transcript(transcript: str) -> str: |
| | """ |
| | Prefer balanced \\boxed{...}/\\fbox{...} content, then <answer>...</answer>, |
| | else fall back to the last 200 chars. |
| | """ |
| | return ( |
| | _extract_answer_boxed(transcript) |
| | or _extract_answer_tagged(transcript) |
| | |
| | ) |
| |
|
| |
|
| | |
| | def answer_question_recall( |
| | question: str, |
| | *, |
| | model_url: Optional[str] = None, |
| | executor_url: Optional[str] = None, |
| | tokenizer_dir: str = "./tokenizer-info", |
| | temperature: float = 0.6, |
| | max_new_tokens: int = 40960, |
| | top_p: float = 0.95, |
| | search_env: str = "from search_api import search_urls, open_url, search_and_parse_query, query_url", |
| | func_schemas = [ |
| | { |
| | "name": "search_urls", |
| | "description": "Google search and return links to web-pages with a brief snippet given a text query", |
| | "parameters": { |
| | "type": "object", |
| | "properties": {"query": {"type": "string"}, "top_k": {"type": "integer", "default": 10}}, |
| | "required": ["query"], |
| | }, |
| | }, |
| | { |
| | "name": "query_url", |
| | "description": "Visit webpage and return evidence based retrival for the provided goal", |
| | "parameters": { |
| | "type": "object", |
| | "properties": { |
| | "url": {"type": "string", "description": "The URL of the webpage to visit. Must be a single URL"}, |
| | "goal": {"type": "string", "description": "The specific information goal for visiting webpage"}, |
| | }, |
| | "required": ["url", "goal"], |
| | }, |
| | }, |
| | ], |
| | deepseek_name: str = "deepseek-ai/DeepSeek-R1", |
| | old_prompt: Optional[str] = None, |
| | deepresearch_on: bool = True, |
| | summary_llm: str = "gpt-4.1-mini", |
| | ): |
| | |
| | """ |
| | Runs a single question through ReCall and returns: |
| | { |
| | "answer": str, |
| | "transcript": str, |
| | "tool_calls": Any, |
| | "chat": Any | None |
| | } |
| | """ |
| | if executor_url is None: |
| | executor_url = os.environ["HOST_SERPER_URL"] |
| | |
| | if model_url is None: |
| | model_url = os.environ["HF_MODEL_URL"] |
| |
|
| | |
| | tok = AutoTokenizer.from_pretrained(tokenizer_dir, trust_remote_code=True) |
| |
|
| | |
| | agent = ReCall(executor_url=executor_url) |
| | |
| | last_out = "" |
| |
|
| | |
| | if model_url == deepseek_name: |
| | |
| | out = agent.run_deepseek( |
| | env=search_env, |
| | func_schemas=func_schemas, |
| | question=question, |
| | model_name=deepseek_name, |
| | temperature=temperature, |
| | max_tokens=max_new_tokens, |
| | top_p=top_p, |
| | ) |
| | transcript, tool_calls, chat = _normalize_out(out, expect_chat=False) |
| | last_out = transcript |
| | else: |
| | |
| | agent_generator = agent.run( |
| | env=search_env, |
| | func_schemas=func_schemas, |
| | question=question, |
| | model_url=model_url, |
| | temperature=temperature, |
| | max_new_tokens=max_new_tokens, |
| | tokenizer=tok, |
| | top_p=top_p, |
| | old_prompt=old_prompt, |
| | |
| | deepresearch_on=deepresearch_on, |
| | summary_llm=summary_llm |
| | |
| | ) |
| | |
| | while True: |
| | try: |
| | tag, out = next(agent_generator) |
| | if tag == "assistant_resp": |
| | last_out = out[0] |
| | yield tag, out |
| | if tag == "end": |
| | break |
| | except StopIteration as e: |
| | |
| | |
| | |
| | chat_str: str = e.value[1][0] |
| | yield "end", (chat_str,) |
| | break |
| |
|
| | |
| | answer = _parse_answer_from_transcript(last_out) |
| |
|
| | return "answer", (answer,) |
| |
|
| |
|
| | def _normalize_out(out: Any, expect_chat: bool) -> tuple[str, Any, Any]: |
| | """ |
| | Normalize ReCall outputs to (transcript, tool_calls, chat) |
| | Handles: |
| | - (transcript, tool_calls, chat) |
| | - (transcript, tool_calls) |
| | - "transcript" |
| | - {"transcript": ..., "tool_calls": ..., "chat": ...} variants |
| | """ |
| | transcript, tool_calls, chat = "", None, None |
| |
|
| | if isinstance(out, tuple): |
| | if len(out) == 3: |
| | transcript, tool_calls, chat = out |
| | elif len(out) == 2: |
| | transcript, tool_calls = out |
| | elif len(out) == 1: |
| | transcript = out[0] |
| | else: |
| | transcript = str(out[-1]) |
| | elif isinstance(out, dict): |
| | transcript = out.get("transcript") or out.get("output") or out.get("response") or "" |
| | tool_calls = out.get("tool_calls") |
| | chat = out.get("chat") |
| | else: |
| | transcript = str(out) |
| |
|
| | |
| | if chat is None and expect_chat is False: |
| | chat = None |
| | return transcript, tool_calls, chat |
| |
|
| |
|
| | |
| | if __name__ == "__main__": |
| | old_prompt = None |
| | |
| | answer_generator = answer_question_recall( |
| | "What is the most popular restraunt in kolkata?", |
| | old_prompt=old_prompt |
| | ) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | final_chat_str = "" |
| | |
| | while True: |
| | try: |
| | tag, out = next(answer_generator) |
| | if tag == "assistant_resp": |
| | assistant_text, tool_calls = out |
| | print(f"ASSISTANT RESPONSE:\n{assistant_text}\n\n") |
| | print("TOOL CALLS:\n") |
| | for tool_call in tool_calls: |
| | print(f"{tool_call}") |
| | print("\n") |
| | elif tag == "tool_results": |
| | results = out[0] |
| | print("TOOL RESULTS:\n") |
| | for result in results: |
| | print(f"{result}") |
| | print("\n") |
| | elif tag == "end": |
| | print(f"{'='*20}\nASSISTANT RESPONSE ENDED\n{'='*20}\n\n") |
| | final_chat_str = out[0] |
| | elif tag == "answer": |
| | answer = out[0] |
| | print(f"FINAL ANSWER:\n{answer}\n\n") |
| | break |
| | except StopIteration as e: |
| | print(f"FINAL ANSWER:\n{e.value[1][0]}\n\n") |
| | break |
| | |
| | print(f"{'='*20}\nEND\n{'='*20}\n\n\nFINAL CHAT STRING:\n{final_chat_str}\n\n") |