Spaces:
Running
on
Zero
Running
on
Zero
| from __future__ import annotations | |
| from typing import Iterable, Literal, Optional | |
| import os | |
| import time | |
| import traceback | |
| from huggingface_hub import InferenceClient, login, logout as hf_logout | |
| from llm.llm_login import login_huggingface, is_loggedin_huggingface #,is_login_huggingface | |
| from utils.logger import get_logger | |
| ## Get logger instance | |
| logger = get_logger(__name__) | |
| class HFChatClient: | |
| """ | |
| Provider‐agnostic LLM client interface. | |
| Encapsulate `huggingface_hub.InferenceClient` setup and chat calls. | |
| Backends: | |
| - model: plain HF model id (e.g., "HuggingFaceH4/zephyr-7b-beta") | |
| - provider: provider-routed id (e.g., "openai/gpt-oss-120b:fireworks-ai") | |
| - endpoint: full inference endpoint URL (e.g., "http://localhost:1234"). | |
| """ | |
| def __init__(self, | |
| #api_token: str, | |
| #model_id: str = "gpt2", | |
| provider: str = "huggingface", ## "huggingface2", "openai" | |
| model_id: str = "openai/gpt-oss-120b", ##default_model | |
| hf_provider: str = "huggingface", | |
| endpoint_url: Optional[str] = None, | |
| #backend: Literal["model", "provider", "endpoint"] = [], | |
| backend_choice: Optional[str] = None, #choices=["model-id", "provider", "endpoint"] | |
| system_message: str = "", | |
| max_tokens: int = 4096, | |
| temperature: float = 0.0, | |
| top_p: float = 0.1, | |
| stream: bool = False, | |
| api_token: Optional[str] = None | |
| ) -> None: | |
| try: | |
| self.model_id = model_id | |
| self.provider = provider.lower() | |
| self.hf_provider = hf_provider.lower() | |
| self.endpoint_url = endpoint_url | |
| #self.backend = backend | |
| #self.backend_literal: Literal["model", "provider", "endpoint"] = ( | |
| ''' | |
| self.backend: Literal["model", "provider", "endpoint"] = ( | |
| "model" if backend_choice == "Hugging Face Model ID" else ( | |
| "provider" if backend_choice == "HF Provider Route" else "endpoint") | |
| ), | |
| ''' | |
| self.backend: Literal["model", "provider", "endpoint"] = ( | |
| "model" if backend_choice == "model-id" else ( | |
| "provider" if backend_choice == "provider" else "endpoint") | |
| ) ## see Gradio backend_choice dropdown | |
| self.system_message = system_message | |
| self.max_tokens = max_tokens | |
| self.temperature = temperature | |
| self.top_p = top_p | |
| self.stream = stream | |
| self.token = api_token if api_token else None #"" # invalid; preserved | |
| #self.token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") ## not preferred | |
| self.base_url = "https://router.huggingface.co/v1" #%22" #HF API proxy | |
| except Exception as exc: | |
| #logger.error(f"client_init_failed", extra={"error": str(exc)}") | |
| tb = traceback.format_exc() | |
| logger.exception(f'✗ client_init_failed", extra={"error": str(exc)}\n{tb}', exc_info=True) | |
| raise RuntimeError(f"✗ Failed to initialise client: {exc}\n{tb}") | |
| ##SMY: //TOBE: Deprecated : Moved to llm.llm_login | |
| ''' | |
| # # Disable implicit token propagation for determinism | |
| # Explicitly disable implicit token propagation; we rely on explicit auth or env var | |
| os.environ["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = "1" | |
| # Privacy-first login: try interactive CLI first; fallback to provided/env token only if needed | |
| try: | |
| login() | |
| time.sleep(15) ##SMY pause for login. Helpful: pool async opex | |
| logger.info("hf_login", extra={"mode": "cli"}) | |
| except Exception as exc: | |
| # Respect common env var names; prefer explicit token arg when provided | |
| fallback_token = self.token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
| if fallback_token: | |
| try: | |
| login(token=fallback_token) | |
| self.token = fallback_token | |
| logger.info("hf_login", extra={"mode": "token"}) | |
| except Exception as exc_token: | |
| logger.warning("hf_login_failed", extra={"error": str(exc_token)}) | |
| else: | |
| logger.warning("hf_login_failed", extra={"error": str(exc)}) | |
| # Silent fallback; client will still work if token is passed directly | |
| #pass | |
| ''' | |
| login_huggingface(self.token) if not is_loggedin_huggingface() else logger.log(level=20, msg=f"You are logged in to HF Hub already") ## attempt login if not already logged in. NB: HF CLI login prompt would not display in Process Worker. | |
| ##SMY: TODO: Mapped with openai_client.py | |
| #self.islogged_in = is_loggedin_huggingface() | |
| def _normalise_history(history: list, system_message: str, latest_user_message: str) -> list[dict]: | |
| """ | |
| `prompt` prefixed by system_message if set | |
| Normalise chat history to list of {"role": role, "content": content} dicts. | |
| Supports both dict and tuple formats for history items. | |
| """ | |
| messages: list[dict] = [] | |
| if system_message: | |
| messages.append({"role": "system", "content": system_message}) | |
| for item in history or []: | |
| if isinstance(item, dict) and "role" in item and "content" in item: | |
| if item["role"] in ("user", "assistant"): | |
| messages.append({"role": item["role"], "content": item["content"]}) | |
| elif isinstance(item, (list, tuple)) and len(item) == 2: | |
| usr, asst = item | |
| if usr: | |
| messages.append({"role": "user", "content": usr}) | |
| if asst: | |
| messages.append({"role": "assistant", "content": asst}) | |
| messages.append({"role": "user", "content": latest_user_message}) | |
| return messages | |
| def _initialise_client(self, | |
| backend: Literal["model", "provider", "endpoint"], | |
| model_id: Optional[str] = None, | |
| hf_provider: Optional[str] = None, | |
| endpoint_url: Optional[str] = None, | |
| token: Optional[str] = None) -> InferenceClient: | |
| try: | |
| match backend: | |
| case "endpoint" | "model": | |
| logger.debug("_initialise_client: initialising with:", extra={"model":model_id}) ## debug | |
| hf_client = InferenceClient(model=model_id or endpoint_url, token=token) #endpoint=target) ##, token=api_token or self.token) | |
| logger.log(20, "client: ", extra={"model":model_id}) ## debug | |
| case "provider": | |
| logger.info("_initialise_client: initialising with:", extra={"provider":hf_provider}) ## debug | |
| hf_client = InferenceClient(provider=hf_provider, model=model_id, token=token) ##, token=api_token or self.token) | |
| #client = client(model = model_id, provider=provider, token=token) ##target | |
| logger.log(20, "client: ", extra={"backend":backend}) ## debug | |
| case _: | |
| raise ValueError("Invalid backend.") | |
| return hf_client | |
| except Exception as exc: | |
| logger.log(40, "_initialise_client: client_init_failed", extra={"error": str(exc)}) ## debug | |
| raise RuntimeError(f"_initialise_client: Failed to initialise client: {exc}") | |
| ## wrap HF client for marker | |
| def chat_fn( | |
| self, | |
| message: str, | |
| history: list = [], | |
| ) -> Iterable[str]: | |
| """ | |
| messages = self._normalise_history(history, system_message, message) | |
| token = api_token or self.token | |
| """ | |
| ## set prompt and token | |
| messages = self._normalise_history(message, history, self.system_message) | |
| #token = api_token or self.token | |
| #token = self.token ## redundant | |
| logger.log(20,"chat: initialising client", extra={ | |
| "backend": self.backend, "model": self.model_id, "provider": self.hf_provider, "endpoint": self.endpoint_url, | |
| "stream": self.stream, "max_tokens": self.max_tokens, "temperature": self.temperature, "top_p": self.top_p, | |
| }) | |
| ## initialised client | |
| try: | |
| client = self._initialise_client(self, self.backend, self.model_id, self.hf_provider, self.endpoint_url, self.token) #api_token) | |
| logger.log(20, "chat: client initialised") ## debug | |
| except Exception as exc: | |
| ##logger.error | |
| logger.log(40,"chat client_init_failed", extra={"error": str(exc)}) | |
| raise RuntimeError(f"chat: Failed to initialise client: {exc}") | |
| logger.log(20, "chat_start", extra={ | |
| "backend": self.backend, "model": self.model_id, "provider": self.hf_provider, "endpoint": self.endpoint_url, | |
| "stream": self.stream, "max_tokens": self.max_tokens, "temperature": self.temperature, "top_p": self.top_p, | |
| }) | |
| if self.stream: | |
| acc = "" | |
| for chunk in client.chat_completion( | |
| messages=messages, | |
| #model=client.model, ## moved back to client initialise | |
| max_tokens=self.max_tokens, | |
| stream=True, | |
| temperature=self.temperature, | |
| top_p=self.top_p, | |
| ): | |
| delta = getattr(chunk.choices[0].delta, "content", None) or "" | |
| if delta: | |
| acc += delta | |
| yield acc | |
| return | |
| result = client.chat_completion( | |
| messages=messages, | |
| #model=client.model, ## moved back to client initialised | |
| max_tokens=self.max_tokens, | |
| stream=False, | |
| temperature=self.temperature, | |
| top_p=self.top_p, | |
| ) | |
| yield result.choices[0].message.content | |
| ''' | |
| ## future consideration | |
| response = client.text_generation( | |
| #model=model_name, | |
| inputs=prompt, | |
| parameters={ | |
| "max_new_tokens": max_new_tokens, | |
| "temperature": temperature, | |
| }, | |
| ) | |
| return response[0].generated_text | |
| ''' | |
| def logout(self) -> bool: | |
| """Logout from Hugging Face and clear in-process tokens. | |
| Returns True on success, False otherwise. | |
| """ | |
| try: | |
| hf_logout() | |
| except Exception as exc: | |
| logger.error("hf_logout_failed", extra={"error": str(exc)}) | |
| return False | |
| # Clear process environment tokens | |
| for key in ("HF_TOKEN", "HUGGINGFACEHUB_API_TOKEN"): | |
| if key in os.environ: | |
| os.environ.pop(key, None) | |
| self.token = None | |
| logger.info("hf_logout_success") | |
| return True | |