Spaces:
Sleeping
Sleeping
| [marker] | |
| provider=openai | |
| #model_id=openai/gpt-oss-120b | |
| ## Marker will return "LLM did not return a valid response" if model is not 'Image-Text-to-Text' | |
| ## because of OpenAI inference failed: Errorcode: 400 ... "Unsupported ChatMessageContent type: image_url" | |
| ## Note that Marker works pretty well using it's own transformer-based model without LLM | |
| model_id=meta-llama/Llama-4-Maverick-17B-128E-Instruct | |
| hf_provider=fireworks-ai | |
| endpoint_url="" | |
| backend_choice=provider | |
| system_message="" | |
| max_tokens=8192 | |
| temperature=0.2 | |
| top_p=0.2 | |
| stream=True | |
| api_token=a1b2c3 | |
| openai_model=openai/gpt-oss-120b | |
| openai_api_key=a1b2c3 | |
| openai_base_url=https://router.huggingface.co/v1 | |
| openai_image_format=webp | |
| #max_retries=3 | |
| #[Configuration] | |
| use_llm=True | |
| output_format=markdown | |
| input_dir=inputs | |
| output_dir=output_md | |
| max_workers=4 | |
| max_retries=2 | |
| extract_images=True | |
| output_image_format=png | |
| output_encoding=utf-8 | |
| debug_data_folder=debug_data | |
| [unsure] | |
| image_output_dir="images" | |
| image_output_format="png" | |
| base_dir=Path(__file__).resolve().parent.parent | |
| ### | |
| # Create a Path object from the current file's location, resolve it to an absolute path, | |
| # and then get its parent's parent using chained .parent calls or the parents[] attribute. | |
| #grandparent_dir = Path(__file__).resolve().parent.parent #os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| ### | |
| [libraries] | |
| libobject_path = C:\\Dat\\dev\\gtk3-runtime\\bin | |
| # from config.ini ##SMY: future plan to merge | |
| [MARKER_CAP] | |
| #[marker] | |
| PROVIDER = openai | |
| #MODEL_ID = openai/gpt-oss-120b | |
| ## Marker will return "LLM did not return a valid response" if model is not 'Image-Text-to-Text' | |
| ## because of OpenAI inference failed: Errorcode: 400 ... "Unsupported ChatMessageContent type: image_url" | |
| ## Note that Marker works pretty well using it's own transformer-based model without LLM | |
| MODEL_ID=meta-llama/Llama-4-Maverick-17B-128E-Instruct | |
| HF_PROVIDER = fireworks-ai | |
| ENDPOINT_URL = "" | |
| BACKEND_CHOiCE = provider | |
| SYSTEM_MESSAGE = "" | |
| MAX_TOKENS = 8192 | |
| TEMMPERATURE = 0.2 | |
| TOP_P = 0.2 | |
| STREAM = True | |
| API_TOKEN = a1b2c3 | |
| OPENAI_MODEL = openai/gpt-oss-120b | |
| OPENAI_API_KEY = a1b2c3 | |
| OPENAI_BASE_URL = https://router.huggingface.co/v1 | |
| OPENAI_IMAGE_FORMAT = webp | |
| #[CONFIGURATION] | |
| MAX_WORKERS = 4 | |
| MAX_RETRIES = 2 | |
| OUTPUT_FORMAT = markdown | |
| INPUT_DIR = inputs | |
| OUTPUT_DIR = output_dir | |
| USE_LLM = False | |
| EXTRACT_IMAGES = True | |
| OUTPUT_IMAGE_FORMAT = png | |
| OUTPUT_ENCODING = utf-8 | |
| DEBUG_DATA_FOLDER = debug_data | |
| [UNSURE_CAP] | |
| IMAGE_OUTPUT_DIR = images | |
| IMAGE_OUTPUT_FORMAT = png | |
| BASE_DIR = Path(__file__).resolve().parent.parent | |
| ### | |
| # Create a Path object from the current file's location, resolve it to an absolute path | |
| # Get its parent's parent using chained .parent calls or the parents[] attribute. | |
| #grandparent_dir = Path(__file__).resolve().parent.parent #os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| ### | |
| [LIBRARIES_CAP] | |
| LIBOBJECT_PATH = C:\\Dat\\dev\\gtk3-runtime\\bin | |
| WEASYPRINT_DLL_DIRECTORIES = C:\\Dat\\dev\\gtk3-runtime\\bin | |
| [GLOBAL_CAP] | |
| # Globals within each worker process | |
| HF_MODEL ="openai/gpt-oss-120b" | |
| HF_TOKEN = "" | |
| HF_CLIENT = None | |
| ARTIFACT_DICT = None | |
| PDF_CONVERTER = None | |
| HTML_CONVERTER = None | |
| [marker_dict] | |
| ## "meta-llama/Llama-4-Maverick-17B-128E-Instruct:fireworks-ai" | |
| provider:"openai" #provider, | |
| model_id:"openai/gpt-oss-120b" #model_id, #"meta-llama/Llama-4-Maverick-17B-128E-Instruct:fireworks-ai" | |
| hf_provider:"fireworks-ai" #hf_provider, | |
| endpoint_url:"" #endpoint_url, | |
| backend_choice:"provider" #backend_choice, | |
| system_message:"" #system_message, | |
| max_tokens:8192 #max_tokens, | |
| temperature:0.2 #temperature, | |
| top_p:0.2 #top_p, | |
| stream:"stream" | |
| api_token:"a1b2c3" #get_token, | |
| output_format:"markdown" #output_format, #"markdown", | |
| openai_model:"openai/gpt-oss-120b" #self.client.model_id, #"model_name" | |
| openai_api_key:"a1b2c3" #self.client.openai_api_key, #self.api_token, | |
| openai_base_url:"https://router.huggingface.co/v1" #self.client.base_url, #self.base_url, | |
| #temperature=self.client.temperature, | |
| #top_p=self.client.top_p, | |
| openai_image_format:"webp" #"png" #better compatibility | |
| max_retries:3 ## pass to __call__ | |
| [marker_nostrip] | |
| provider="openai" | |
| model_id="openai/gpt-oss-120b" | |
| hf_provider="fireworks-ai" | |
| endpoint_url="" | |
| backend_choice="provider" | |
| system_message="" | |
| max_tokens=8192 | |
| temperature=0.2 | |
| top_p=0.2 | |
| stream=True | |
| api_token="a1b2c3" | |
| openai_model="openai/gpt-oss-120b" | |
| openai_api_key="a1b2c3" | |
| openai_base_url="https://router.huggingface.co/v1" | |
| openai_image_format="webp" | |
| #max_retries=3 | |
| #[Configuration] | |
| use_llm=True | |
| output_format="markdown" | |
| input_dir="inputs" | |
| output_dir="output_md" | |
| max_workers=4 | |
| max_retries=2 | |
| extract_images=True | |
| output_image_format="png" | |
| output_encoding=utf-8 | |
| debug_data_folder="debug_data" |