seanpedrickcase commited on
Commit
40bd54b
·
1 Parent(s): f5146c7

Cleaned requirements.txt file

Browse files
Files changed (1) hide show
  1. requirements.txt +41 -33
requirements.txt CHANGED
@@ -1,47 +1,55 @@
1
- pdfminer.six==20250506
2
- pdf2image==1.17.0
3
- pymupdf==1.26.4
4
- opencv-python==4.12.0.88
5
- presidio_analyzer==2.2.360
6
- presidio_anonymizer==2.2.360
7
- presidio-image-redactor==0.0.57
8
- pikepdf==9.11.0
9
  pandas==2.3.3
10
- scikit-learn==1.7.2
11
- spacy==3.8.7
12
- en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
13
- gradio==5.49.1
14
  polars==1.33.1
15
- boto3==1.40.57
16
  pyarrow==21.0.0
17
  openpyxl==3.1.5
 
 
 
18
  Faker==37.8.0
19
  python-levenshtein==0.27.1
20
- spaczz==0.6.1
21
- https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl # This version includes rotation, image zoom, and default labels, as well as the option to include id for annotation boxes
22
  rapidfuzz==3.14.1
23
- python-dotenv==1.0.1
24
- awslambdaric==3.1.1
25
- python-docx==1.2.0
26
- defusedxml==0.7.1
27
- # Test dependencies
28
- pytest>=7.0.0
29
- pytest-cov>=4.0.0
30
- spaces==0.42.1
31
- # paddleOCR if you want to use better quality local text extraction
32
- #paddlepaddle==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ # CPU version
33
- # Below compatible with CUDA 12.6. See this for more details: # https://www.paddlepaddle.org.cn/documentation/docs/en/install/pip/linux-pip_en.html#span-id-gpu-gpu-version-of-paddlepaddle-span
34
- paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
35
- #https://paddle-whl.bj.bcebos.com/stable/cu126/paddlepaddle-gpu/paddlepaddle_gpu-3.2.1-cp311-cp311-win_amd64.whl
36
- paddleocr==3.3.0
37
- # Install the following to run VLMs
38
- torch==2.6.0 torchvision==0.21 --index-url https://download.pytorch.org/whl/cu126
39
  transformers==4.57.1
40
  accelerate==1.11.0
41
- numpy==2.2.6
42
- #flash-attn==2.8.3 # Commented out as only compatible with Linux systems
43
 
 
 
 
 
44
 
 
 
 
 
45
 
 
 
 
 
 
 
 
 
 
46
 
 
 
 
 
47
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- Core and data packages ---
2
+ numpy==2.2.6
 
 
 
 
 
 
3
  pandas==2.3.3
 
 
 
 
4
  polars==1.33.1
 
5
  pyarrow==21.0.0
6
  openpyxl==3.1.5
7
+ boto3==1.40.57
8
+ python-dotenv==1.0.1
9
+ defusedxml==0.7.1
10
  Faker==37.8.0
11
  python-levenshtein==0.27.1
 
 
12
  rapidfuzz==3.14.1
13
+
14
+ # --- Machine learning / NLP ---
15
+ scikit-learn==1.7.2
16
+ spacy==3.8.7
17
+ spaczz==0.6.1
18
+ en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
 
 
 
 
 
 
 
 
 
 
19
  transformers==4.57.1
20
  accelerate==1.11.0
 
 
21
 
22
+ # --- PyTorch (CUDA 12.6) ---
23
+ --extra-index-url https://download.pytorch.org/whl/cu126
24
+ torch==2.6.0
25
+ torchvision==0.21
26
 
27
+ # --- PaddleOCR (CUDA 12.6) ---
28
+ --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
29
+ paddlepaddle-gpu==3.2.0
30
+ paddleocr==3.3.0
31
 
32
+ # --- PDF / OCR / Redaction tools ---
33
+ pdfminer.six==20250506
34
+ pdf2image==1.17.0
35
+ pymupdf==1.26.4
36
+ pikepdf==9.11.0
37
+ opencv-python==4.12.0.88
38
+ presidio_analyzer==2.2.360
39
+ presidio_anonymizer==2.2.360
40
+ presidio-image-redactor==0.0.57
41
 
42
+ # --- Gradio and apps ---
43
+ gradio==5.49.1
44
+ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl # Custom annotator version with rotation, zoom, labels, and box IDs
45
+ spaces==0.42.1
46
 
47
+ # --- AWS Lambda runtime ---
48
+ awslambdaric==3.1.1
49
+
50
+ # --- Document generation ---
51
+ python-docx==1.2.0
52
+
53
+ # --- Testing ---
54
+ pytest>=7.0.0
55
+ pytest-cov>=4.0.0