Spaces:

seanpedrickcase
/

document_redaction

Running

App Files Files Community

seanpedrickcase commited on 9 days ago

Commit

40bd54b

1 Parent(s): f5146c7

Cleaned requirements.txt file

Browse files

Files changed (1) hide show

requirements.txt +41 -33

requirements.txt CHANGED Viewed

@@ -1,47 +1,55 @@
-pdfminer.six==20250506
-pdf2image==1.17.0
-pymupdf==1.26.4
-opencv-python==4.12.0.88
-presidio_analyzer==2.2.360
-presidio_anonymizer==2.2.360
-presidio-image-redactor==0.0.57
-pikepdf==9.11.0
 pandas==2.3.3
-scikit-learn==1.7.2
-spacy==3.8.7
-en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
-gradio==5.49.1
 polars==1.33.1
-boto3==1.40.57
 pyarrow==21.0.0
 openpyxl==3.1.5
 Faker==37.8.0
 python-levenshtein==0.27.1
-spaczz==0.6.1
-https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl # This version includes rotation, image zoom, and default labels, as well as the option to include id for annotation boxes
 rapidfuzz==3.14.1
-python-dotenv==1.0.1
-awslambdaric==3.1.1
-python-docx==1.2.0
-defusedxml==0.7.1
-# Test dependencies
-pytest>=7.0.0
-pytest-cov>=4.0.0
-spaces==0.42.1
-# paddleOCR if you want to use better quality local text extraction
-#paddlepaddle==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ # CPU version
-# Below compatible with CUDA 12.6. See this for more details: # https://www.paddlepaddle.org.cn/documentation/docs/en/install/pip/linux-pip_en.html#span-id-gpu-gpu-version-of-paddlepaddle-span
-paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
-#https://paddle-whl.bj.bcebos.com/stable/cu126/paddlepaddle-gpu/paddlepaddle_gpu-3.2.1-cp311-cp311-win_amd64.whl
-paddleocr==3.3.0
-# Install the following to run VLMs
-torch==2.6.0 torchvision==0.21 --index-url https://download.pytorch.org/whl/cu126
 transformers==4.57.1
 accelerate==1.11.0
-numpy==2.2.6
-#flash-attn==2.8.3 # Commented out as only compatible with Linux systems

+# --- Core and data packages ---
+numpy==2.2.6
 pandas==2.3.3
 polars==1.33.1
 pyarrow==21.0.0
 openpyxl==3.1.5
+boto3==1.40.57
+python-dotenv==1.0.1
+defusedxml==0.7.1
 Faker==37.8.0
 python-levenshtein==0.27.1
 rapidfuzz==3.14.1
+# --- Machine learning / NLP ---
+scikit-learn==1.7.2
+spacy==3.8.7
+spaczz==0.6.1
+en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
 transformers==4.57.1
 accelerate==1.11.0
+# --- PyTorch (CUDA 12.6) ---
+--extra-index-url https://download.pytorch.org/whl/cu126
+torch==2.6.0
+torchvision==0.21
+# --- PaddleOCR (CUDA 12.6) ---
+--extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
+paddlepaddle-gpu==3.2.0
+paddleocr==3.3.0
+# --- PDF / OCR / Redaction tools ---
+pdfminer.six==20250506
+pdf2image==1.17.0
+pymupdf==1.26.4
+pikepdf==9.11.0
+opencv-python==4.12.0.88
+presidio_analyzer==2.2.360
+presidio_anonymizer==2.2.360
+presidio-image-redactor==0.0.57
+# --- Gradio and apps ---
+gradio==5.49.1
+https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl  # Custom annotator version with rotation, zoom, labels, and box IDs
+spaces==0.42.1
+# --- AWS Lambda runtime ---
+awslambdaric==3.1.1
+# --- Document generation ---
+python-docx==1.2.0
+# --- Testing ---
+pytest>=7.0.0
+pytest-cov>=4.0.0