Spaces:
Sleeping
Sleeping
File size: 4,850 Bytes
0fd441a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
[marker]
provider=openai
#model_id=openai/gpt-oss-120b
## Marker will return "LLM did not return a valid response" if model is not 'Image-Text-to-Text'
## because of OpenAI inference failed: Errorcode: 400 ... "Unsupported ChatMessageContent type: image_url"
## Note that Marker works pretty well using it's own transformer-based model without LLM
model_id=meta-llama/Llama-4-Maverick-17B-128E-Instruct
hf_provider=fireworks-ai
endpoint_url=""
backend_choice=provider
system_message=""
max_tokens=8192
temperature=0.2
top_p=0.2
stream=True
api_token=a1b2c3
openai_model=openai/gpt-oss-120b
openai_api_key=a1b2c3
openai_base_url=https://router.huggingface.co/v1
openai_image_format=webp
#max_retries=3
#[Configuration]
use_llm=True
output_format=markdown
input_dir=inputs
output_dir=output_md
max_workers=4
max_retries=2
extract_images=True
output_image_format=png
output_encoding=utf-8
debug_data_folder=debug_data
[unsure]
image_output_dir="images"
image_output_format="png"
base_dir=Path(__file__).resolve().parent.parent
###
# Create a Path object from the current file's location, resolve it to an absolute path,
# and then get its parent's parent using chained .parent calls or the parents[] attribute.
#grandparent_dir = Path(__file__).resolve().parent.parent #os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
###
[libraries]
libobject_path = C:\\Dat\\dev\\gtk3-runtime\\bin
# from config.ini ##SMY: future plan to merge
[MARKER_CAP]
#[marker]
PROVIDER = openai
#MODEL_ID = openai/gpt-oss-120b
## Marker will return "LLM did not return a valid response" if model is not 'Image-Text-to-Text'
## because of OpenAI inference failed: Errorcode: 400 ... "Unsupported ChatMessageContent type: image_url"
## Note that Marker works pretty well using it's own transformer-based model without LLM
MODEL_ID=meta-llama/Llama-4-Maverick-17B-128E-Instruct
HF_PROVIDER = fireworks-ai
ENDPOINT_URL = ""
BACKEND_CHOiCE = provider
SYSTEM_MESSAGE = ""
MAX_TOKENS = 8192
TEMMPERATURE = 0.2
TOP_P = 0.2
STREAM = True
API_TOKEN = a1b2c3
OPENAI_MODEL = openai/gpt-oss-120b
OPENAI_API_KEY = a1b2c3
OPENAI_BASE_URL = https://router.huggingface.co/v1
OPENAI_IMAGE_FORMAT = webp
#[CONFIGURATION]
MAX_WORKERS = 4
MAX_RETRIES = 2
OUTPUT_FORMAT = markdown
INPUT_DIR = inputs
OUTPUT_DIR = output_dir
USE_LLM = False
EXTRACT_IMAGES = True
OUTPUT_IMAGE_FORMAT = png
OUTPUT_ENCODING = utf-8
DEBUG_DATA_FOLDER = debug_data
[UNSURE_CAP]
IMAGE_OUTPUT_DIR = images
IMAGE_OUTPUT_FORMAT = png
BASE_DIR = Path(__file__).resolve().parent.parent
###
# Create a Path object from the current file's location, resolve it to an absolute path
# Get its parent's parent using chained .parent calls or the parents[] attribute.
#grandparent_dir = Path(__file__).resolve().parent.parent #os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
###
[LIBRARIES_CAP]
LIBOBJECT_PATH = C:\\Dat\\dev\\gtk3-runtime\\bin
WEASYPRINT_DLL_DIRECTORIES = C:\\Dat\\dev\\gtk3-runtime\\bin
[GLOBAL_CAP]
# Globals within each worker process
HF_MODEL ="openai/gpt-oss-120b"
HF_TOKEN = ""
HF_CLIENT = None
ARTIFACT_DICT = None
PDF_CONVERTER = None
HTML_CONVERTER = None
[marker_dict]
## "meta-llama/Llama-4-Maverick-17B-128E-Instruct:fireworks-ai"
provider:"openai" #provider,
model_id:"openai/gpt-oss-120b" #model_id, #"meta-llama/Llama-4-Maverick-17B-128E-Instruct:fireworks-ai"
hf_provider:"fireworks-ai" #hf_provider,
endpoint_url:"" #endpoint_url,
backend_choice:"provider" #backend_choice,
system_message:"" #system_message,
max_tokens:8192 #max_tokens,
temperature:0.2 #temperature,
top_p:0.2 #top_p,
stream:"stream"
api_token:"a1b2c3" #get_token,
output_format:"markdown" #output_format, #"markdown",
openai_model:"openai/gpt-oss-120b" #self.client.model_id, #"model_name"
openai_api_key:"a1b2c3" #self.client.openai_api_key, #self.api_token,
openai_base_url:"https://router.huggingface.co/v1" #self.client.base_url, #self.base_url,
#temperature=self.client.temperature,
#top_p=self.client.top_p,
openai_image_format:"webp" #"png" #better compatibility
max_retries:3 ## pass to __call__
[marker_nostrip]
provider="openai"
model_id="openai/gpt-oss-120b"
hf_provider="fireworks-ai"
endpoint_url=""
backend_choice="provider"
system_message=""
max_tokens=8192
temperature=0.2
top_p=0.2
stream=True
api_token="a1b2c3"
openai_model="openai/gpt-oss-120b"
openai_api_key="a1b2c3"
openai_base_url="https://router.huggingface.co/v1"
openai_image_format="webp"
#max_retries=3
#[Configuration]
use_llm=True
output_format="markdown"
input_dir="inputs"
output_dir="output_md"
max_workers=4
max_retries=2
extract_images=True
output_image_format="png"
output_encoding=utf-8
debug_data_folder="debug_data" |