Spaces:
Running
on
Zero
Running
on
Zero
baseline08_beta0.3.4_03Oct25: fixing slow Marker PdfConverter() ; moved @spaces.GPU to pool.map() ; - minor tweaks/fixes
Browse files- converters/extraction_converter.py +2 -0
- requirements.txt +2 -2
- ui/gradio_ui.py +3 -2
converters/extraction_converter.py
CHANGED
|
@@ -168,6 +168,8 @@ class DocumentConverter:
|
|
| 168 |
#config=config_parser.generate_config_dict(),
|
| 169 |
#llm_service=self.llm_service ##SMY expecting str but self.llm_service, is service object marker.services of type BaseServices
|
| 170 |
llm_service=llm_service_str, ##resolve
|
|
|
|
|
|
|
| 171 |
)
|
| 172 |
|
| 173 |
logger.log(level=20, msg="✔️ MarkerConverter instantiated successfully:", extra={"converter.config": str(self.converter.config.get("openai_base_url")), "use_llm":self.converter.use_llm})
|
|
|
|
| 168 |
#config=config_parser.generate_config_dict(),
|
| 169 |
#llm_service=self.llm_service ##SMY expecting str but self.llm_service, is service object marker.services of type BaseServices
|
| 170 |
llm_service=llm_service_str, ##resolve
|
| 171 |
+
processor_list=config_parser.get_processors(),
|
| 172 |
+
renderer=config_parser.get_renderer(),
|
| 173 |
)
|
| 174 |
|
| 175 |
logger.log(level=20, msg="✔️ MarkerConverter instantiated successfully:", extra={"converter.config": str(self.converter.config.get("openai_base_url")), "use_llm":self.converter.use_llm})
|
requirements.txt
CHANGED
|
@@ -6,8 +6,8 @@ gradio>=5.44.0 # gradio[mcp]>=5.44.0
|
|
| 6 |
|
| 7 |
## HF Spaces recommendation: https://huggingface.co/docs/hub/spaces-gpus#frameworks
|
| 8 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
| 9 |
-
torch
|
| 10 |
-
#torch>=2.7.
|
| 11 |
spaces>=0.42.1 # HF Spaces (default on HF Spaces
|
| 12 |
#huggingface_hub>=0.34.0 # HuggingFace integration
|
| 13 |
|
|
|
|
| 6 |
|
| 7 |
## HF Spaces recommendation: https://huggingface.co/docs/hub/spaces-gpus#frameworks
|
| 8 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
| 9 |
+
torch==2.7.0 # torch
|
| 10 |
+
#torch>=2.7.0 # ZeroGPU support
|
| 11 |
spaces>=0.42.1 # HF Spaces (default on HF Spaces
|
| 12 |
#huggingface_hub>=0.34.0 # HuggingFace integration
|
| 13 |
|
ui/gradio_ui.py
CHANGED
|
@@ -59,7 +59,7 @@ except Exception as exc:
|
|
| 59 |
# pool executor to convert files called by Gradio
|
| 60 |
##SMY: TODO: future: refactor to gradio_process.py and
|
| 61 |
## pull options to cli-options{"output_format":, "output_dir_string":, "use_llm":, "page_range":, "force_ocr":, "debug":, "strip_existing_ocr":, "disable_ocr_math""}
|
| 62 |
-
|
| 63 |
def convert_batch(
|
| 64 |
pdf_files, #: list[str],
|
| 65 |
pdf_files_count: int,
|
|
@@ -261,6 +261,7 @@ def convert_batch(
|
|
| 261 |
#progress((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
|
| 262 |
#progress2((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
|
| 263 |
#time.sleep(0.25)'''
|
|
|
|
| 264 |
def get_results_pool_map(pdf_files, pdf_files_count, progress2=gr.Progress()):
|
| 265 |
#Use progress.tqdm to integrate with the executor map
|
| 266 |
#results = pool.map(pdf2md_converter.convert_files, pdf_files) ##SMY iterables #max_retries #output_dir_string)
|
|
@@ -683,7 +684,7 @@ def build_interface() -> gr.Blocks:
|
|
| 683 |
with gr.Column():
|
| 684 |
page_range_tb = gr.Textbox(
|
| 685 |
label="Page Range (Optional)",
|
| 686 |
-
value=0,
|
| 687 |
placeholder="Example: 0,1-5,8,12-15 ~(default: first page)",
|
| 688 |
lines=1,
|
| 689 |
max_lines=1,
|
|
|
|
| 59 |
# pool executor to convert files called by Gradio
|
| 60 |
##SMY: TODO: future: refactor to gradio_process.py and
|
| 61 |
## pull options to cli-options{"output_format":, "output_dir_string":, "use_llm":, "page_range":, "force_ocr":, "debug":, "strip_existing_ocr":, "disable_ocr_math""}
|
| 62 |
+
#@spaces.GPU
|
| 63 |
def convert_batch(
|
| 64 |
pdf_files, #: list[str],
|
| 65 |
pdf_files_count: int,
|
|
|
|
| 261 |
#progress((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
|
| 262 |
#progress2((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
|
| 263 |
#time.sleep(0.25)'''
|
| 264 |
+
@spaces.GPU ## HF Spaces GPU support
|
| 265 |
def get_results_pool_map(pdf_files, pdf_files_count, progress2=gr.Progress()):
|
| 266 |
#Use progress.tqdm to integrate with the executor map
|
| 267 |
#results = pool.map(pdf2md_converter.convert_files, pdf_files) ##SMY iterables #max_retries #output_dir_string)
|
|
|
|
| 684 |
with gr.Column():
|
| 685 |
page_range_tb = gr.Textbox(
|
| 686 |
label="Page Range (Optional)",
|
| 687 |
+
value="0-0",
|
| 688 |
placeholder="Example: 0,1-5,8,12-15 ~(default: first page)",
|
| 689 |
lines=1,
|
| 690 |
max_lines=1,
|