Spaces:
Running
on
Zero
Running
on
Zero
| # utils/config.py | |
| import os | |
| """ | |
| Centralised configuration constants. | |
| ##SMY: TODO: Create Class Settings(BaseSettings) leveraging from pydantic_settings import BaseSettings | |
| """ | |
| # UI text | |
| TITLE = "Parser2md (PyPDFmd/ParserPDF) – PDF & HTML ↔ Markdown Converter" | |
| DESCRIPTION = ( | |
| "Parser2md (PyPDFmd) - Convert files to Markdown." | |
| ) | |
| DESCRIPTION_PDF_HTML = ( | |
| "Upload a single or multiple PDF or HTML, a folder or an entire directory tree " | |
| "to convert to Markdown." | |
| ) | |
| DESCRIPTION_PDF = ( | |
| "Drag‑and‑drop a single PDF, a folder of PDFs or an entire directory tree " | |
| "to convert to Markdown." | |
| ) | |
| DESCRIPTION_HTML = ( | |
| "Drag‑and‑drop a single HTML, a folder of HTMLs or an entire directory tree " | |
| "to convert to Markdown." | |
| ) | |
| DESCRIPTION_MD = ( | |
| "Upload Markdown/LaTeX files and generate a polished PDF." | |
| ) | |
| ##SMY: See config.ini | |
| ##=================== | |
| ''' | |
| # Conversion defaults | |
| DEFAULT_MARKER_OPTIONS = { | |
| "include_images": True, | |
| "image_format": "png", | |
| } | |
| # Configuration | |
| MAX_WORKERS = int(os.getenv("MAX_WORKERS", "4")) | |
| MAX_RETRIES = int(os.getenv("MAX_RETRIES", "2")) #3 | |
| INPUT_DIR = os.getenv("INPUT_DIR", "inputs") # unused | |
| OUTPUT_DIR = os.getenv("OUTPUT_DIR", "md_output") | |
| USE_LLM = bool(os.getenv("USE-LLM", False)) #True | |
| EXTRACT_IMAGES = bool(os.getenv("EXTRACT_IMAGES", True)) #True | |
| OUTPUT_IMAGE_FORMAT = os.getenv("OUTPUT_IMAGE_FORMAT", "png") #png | |
| OUTPUT_ENCODING = os.getenv("OUTPUT_ENCODING", "utf-8") #utf-8 | |
| DEBUG_DATA_FOLDER = os.getenv("DEBUG_DATA_FOLDER", "debug_data") #debug_data | |
| # Global | |
| HF_MODEL = os.getenv("HF_MODEL", "gpt2") # swap for a chat-capable model | |
| HF_TOKEN = os.getenv("HF_TOKEN") # your Hugging Face token | |
| ## //TODO: | |
| # from config.ini ##SMY: future plan to merge | |
| api_token="a1b2c3" | |
| OUTPUT_FORMAT = "markdown" #output_format | |
| OPENAI_MODEL = "openai/gpt-oss-120b" #openai_model | |
| OPENAI_API_KEY = "a1b2c3" #openai_api_key | |
| OPENAI_BASE_URL = "https://router.huggingface.co/v1" ##openai_base_url | |
| OPENAI_IMAGE_FORMAT = "webp" #openai_image_format | |
| OUTPUT_IMAGE_FORMAT = "png" | |
| #max_retries=3 | |
| #[marker] | |
| PROVIDER = "openai" #provider | |
| MODEL_ID = "openai/gpt-oss-120b" #model_id | |
| HF_PROVIDER = "fireworks-ai" #hf_provider | |
| ENDPOINT_URL = "" #endpoint_url | |
| BACKEND_CHOiCE = "provider" #backend_choice | |
| SYSTEM_MESSAGE = "" #system_message | |
| MAX_TOKENS = 8192 #max_tokens | |
| TEMMPERATURE = 0.2 #temperature | |
| TOP_P = 0.2 #top_p | |
| STREAM = True #stream | |
| # Globals within each worker process | |
| hf_client = None | |
| artifact_dict = None | |
| pdf_converter = None | |
| html_converter = None | |
| ''' | |