[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"

[project.urls]
Homepage = "https://seanpedrick-case.github.io/doc_redaction/"
Repository = "https://github.com/seanpedrick-case/doc_redaction"

[project]
name = "doc_redaction"
version = "1.5.2"
description = "Redact PDF/image-based documents, Word, or CSV/XLSX files using a Gradio-based GUI interface"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
    "pdfminer.six==20250506",
    "pdf2image==1.17.0",
    "pymupdf==1.26.4",
    "opencv-python==4.12.0.88",
    "presidio_analyzer==2.2.360",
    "presidio_anonymizer==2.2.360",
    "presidio-image-redactor==0.0.57",
    "pikepdf==9.11.0",
    "pandas==2.3.3",
    "scikit-learn==1.7.2",
    "spacy==3.8.7",
    "en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz",
    "gradio==5.49.1",
    "boto3==1.40.57",
    "pyarrow==21.0.0",
    "openpyxl==3.1.5",
    "Faker==37.8.0",
    "python-levenshtein==0.27.1",
    "spaczz==0.6.1",
    "gradio_image_annotation @ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl",
    "rapidfuzz==3.14.1",
    "python-dotenv==1.0.1",
    "awslambdaric==3.1.1",
    "python-docx==1.2.0",
    "polars==1.33.1",
    "defusedxml==0.7.1",
    "numpy==2.2.6",
]

[project.optional-dependencies]

# For testing
dev = ["pytest"]
test = ["pytest", "pytest-cov"]

# To install the app with paddle and vlm support with pip, example command (in base folder and correct python environment): pip install .[paddle,vlm], or uv pip install .[ocr,vlm] if using uv. Note need to GPU version of Torch below

# Extra dependencies for PaddleOCR
# If you want the GPU-accelerated version, run manually pip install paddlepaddle-gpu<=3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
paddle = [
    "paddlepaddle>=3.0.0,<=3.2.1", 
    "paddleocr==3.3.0",
]

# Extra dependencies for VLM models
# For torch you should use --index-url https://download.pytorch.org/whl/cu126 for cuda support for paddleocr, need to install manually
vlm = [
    "torch>=2.5.1,<=2.8.0", 
    "torchvision>=0.20.1",
    "transformers==4.57.1",
    "accelerate==1.11.0",
]

# Configuration for Ruff linter:
[tool.ruff]
line-length = 88

[tool.ruff.lint]
select = ["E", "F", "I"]
ignore = [
    "E501",  # line-too-long (handled with Black)
    "E402",  # module-import-not-at-top-of-file (sometimes needed for conditional imports)
]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]  # Allow unused imports in __init__.py

# Configuration for a Black formatter:
[tool.black]
line-length = 88
target-version = ['py310']

# Configuration for pytest:
[tool.pytest.ini_options]
filterwarnings = [
    "ignore::DeprecationWarning:click.parser",
    "ignore::DeprecationWarning:weasel.util.config",
    "ignore::DeprecationWarning:builtin type",
    "ignore::DeprecationWarning:websockets.legacy",
    "ignore::DeprecationWarning:websockets.server",
    "ignore::DeprecationWarning:spacy.cli._util",
    "ignore::DeprecationWarning:weasel.util.config",
    "ignore::DeprecationWarning:importlib._bootstrap",
]
testpaths = ["test"]
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = [
    "-v",
    "--tb=short",
    "--strict-markers",
    "--disable-warnings",
]