Commit
·
1935c45
1
Parent(s):
e59fbb7
Updated requirements
Browse files- Dockerfile +1 -1
- app.py +1 -0
- pre-requirements.txt +4 -0
- pyproject.toml +2 -2
- requirements.txt +17 -21
Dockerfile
CHANGED
|
@@ -29,7 +29,7 @@ RUN if [ "$INSTALL_PADDLEOCR" = "True" ]; then \
|
|
| 29 |
fi
|
| 30 |
|
| 31 |
RUN if [ "$INSTALL_VLM" = "True" ]; then \
|
| 32 |
-
pip install --verbose --no-cache-dir --target=/install torch==2.
|
| 33 |
pip install --verbose --no-cache-dir --target=/install transformers==4.57.1 accelerate==1.11.0 bitsandbytes==0.48.1; \
|
| 34 |
fi
|
| 35 |
|
|
|
|
| 29 |
fi
|
| 30 |
|
| 31 |
RUN if [ "$INSTALL_VLM" = "True" ]; then \
|
| 32 |
+
pip install --verbose --no-cache-dir --target=/install torch==2.8.0 torchvision==0.24.0 --index-url https://download.pytorch.org/whl/cu126; \
|
| 33 |
pip install --verbose --no-cache-dir --target=/install transformers==4.57.1 accelerate==1.11.0 bitsandbytes==0.48.1; \
|
| 34 |
fi
|
| 35 |
|
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
from pathlib import Path
|
| 3 |
|
| 4 |
import gradio as gr
|
|
|
|
| 1 |
import os
|
| 2 |
+
import spaces
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
import gradio as gr
|
pre-requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# --- PyTorch (CUDA 12.6) ---
|
| 2 |
+
--extra-index-url https://download.pytorch.org/whl/cu126
|
| 3 |
+
torch>=2.5.1, <=2.8.0
|
| 4 |
+
torchvision>=0.20.1, <=0.24.0
|
pyproject.toml
CHANGED
|
@@ -61,8 +61,8 @@ paddle = [
|
|
| 61 |
|
| 62 |
# Extra dependencies for VLM models
|
| 63 |
vlm = [
|
| 64 |
-
"torch==2.
|
| 65 |
-
"torchvision==0.
|
| 66 |
"transformers==4.57.1",
|
| 67 |
"accelerate==1.11.0",
|
| 68 |
]
|
|
|
|
| 61 |
|
| 62 |
# Extra dependencies for VLM models
|
| 63 |
vlm = [
|
| 64 |
+
"torch==2.8.0", # should use --index-url https://download.pytorch.org/whl/cu126 for cuda support for paddleocr, need to install manually
|
| 65 |
+
"torchvision==0.24.0",
|
| 66 |
"transformers==4.57.1",
|
| 67 |
"accelerate==1.11.0",
|
| 68 |
]
|
requirements.txt
CHANGED
|
@@ -11,24 +11,6 @@ Faker==37.8.0
|
|
| 11 |
python-levenshtein==0.27.1
|
| 12 |
rapidfuzz==3.14.1
|
| 13 |
|
| 14 |
-
# --- Machine learning / NLP ---
|
| 15 |
-
scikit-learn==1.7.2
|
| 16 |
-
spacy==3.8.7
|
| 17 |
-
spaczz==0.6.1
|
| 18 |
-
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
|
| 19 |
-
transformers==4.57.1
|
| 20 |
-
accelerate==1.11.0
|
| 21 |
-
|
| 22 |
-
# --- PyTorch (CUDA 12.6) ---
|
| 23 |
-
--extra-index-url https://download.pytorch.org/whl/cu126
|
| 24 |
-
torch
|
| 25 |
-
torchvision
|
| 26 |
-
|
| 27 |
-
# --- PaddleOCR (CUDA 12.6) ---
|
| 28 |
-
--extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
|
| 29 |
-
paddlepaddle-gpu==3.2.1
|
| 30 |
-
paddleocr==3.3.0
|
| 31 |
-
|
| 32 |
# --- PDF / OCR / Redaction tools ---
|
| 33 |
pdfminer.six==20250506
|
| 34 |
pdf2image==1.17.0
|
|
@@ -39,6 +21,9 @@ presidio_analyzer==2.2.360
|
|
| 39 |
presidio_anonymizer==2.2.360
|
| 40 |
presidio-image-redactor==0.0.57
|
| 41 |
|
|
|
|
|
|
|
|
|
|
| 42 |
# --- Gradio and apps ---
|
| 43 |
gradio==5.49.1
|
| 44 |
https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl # Custom annotator version with rotation, zoom, labels, and box IDs
|
|
@@ -47,9 +32,20 @@ spaces==0.42.1
|
|
| 47 |
# --- AWS Lambda runtime ---
|
| 48 |
awslambdaric==3.1.1
|
| 49 |
|
| 50 |
-
# ---
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
# --- Testing ---
|
| 54 |
pytest>=7.0.0
|
| 55 |
-
pytest-cov>=4.0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
python-levenshtein==0.27.1
|
| 12 |
rapidfuzz==3.14.1
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# --- PDF / OCR / Redaction tools ---
|
| 15 |
pdfminer.six==20250506
|
| 16 |
pdf2image==1.17.0
|
|
|
|
| 21 |
presidio_anonymizer==2.2.360
|
| 22 |
presidio-image-redactor==0.0.57
|
| 23 |
|
| 24 |
+
# --- Document generation ---
|
| 25 |
+
python-docx==1.2.0
|
| 26 |
+
|
| 27 |
# --- Gradio and apps ---
|
| 28 |
gradio==5.49.1
|
| 29 |
https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl # Custom annotator version with rotation, zoom, labels, and box IDs
|
|
|
|
| 32 |
# --- AWS Lambda runtime ---
|
| 33 |
awslambdaric==3.1.1
|
| 34 |
|
| 35 |
+
# --- Machine learning / NLP ---
|
| 36 |
+
scikit-learn==1.7.2
|
| 37 |
+
spacy==3.8.7
|
| 38 |
+
spaczz==0.6.1
|
| 39 |
+
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
|
| 40 |
|
| 41 |
# --- Testing ---
|
| 42 |
pytest>=7.0.0
|
| 43 |
+
pytest-cov>=4.0.0
|
| 44 |
+
|
| 45 |
+
transformers==4.57.1
|
| 46 |
+
accelerate==1.11.0
|
| 47 |
+
|
| 48 |
+
# --- PaddleOCR (CUDA 12.6) ---
|
| 49 |
+
--extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
|
| 50 |
+
paddlepaddle-gpu>=3.0.0, <=3.3.0
|
| 51 |
+
paddleocr<=3.3.0
|