[build-system] requires = ["setuptools>=61.0", "wheel"] build-backend = "setuptools.build_meta" [project.urls] Homepage = "https://seanpedrick-case.github.io/doc_redaction/" Repository = "https://github.com/seanpedrick-case/doc_redaction" [project] name = "doc_redaction" version = "1.5.2" description = "Redact PDF/image-based documents, Word, or CSV/XLSX files using a Gradio-based GUI interface" readme = "README.md" requires-python = ">=3.10" dependencies = [ "pdfminer.six==20250506", "pdf2image==1.17.0", "pymupdf==1.26.4", "opencv-python==4.12.0.88", "presidio_analyzer==2.2.360", "presidio_anonymizer==2.2.360", "presidio-image-redactor==0.0.57", "pikepdf==9.11.0", "pandas==2.3.3", "scikit-learn==1.7.2", "spacy==3.8.7", "en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz", "gradio==5.49.1", "boto3==1.40.57", "pyarrow==21.0.0", "openpyxl==3.1.5", "Faker==37.8.0", "python-levenshtein==0.27.1", "spaczz==0.6.1", "gradio_image_annotation @ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl", "rapidfuzz==3.14.1", "python-dotenv==1.0.1", "awslambdaric==3.1.1", "python-docx==1.2.0", "polars==1.33.1", "defusedxml==0.7.1", "numpy==2.2.6", ] [project.optional-dependencies] # For testing dev = ["pytest"] test = ["pytest", "pytest-cov"] # To install the app with paddle and vlm support with pip, example command (in base folder and correct python environment): pip install .[paddle,vlm], or uv pip install .[ocr,vlm] if using uv. Note need to GPU version of Torch below # Extra dependencies for PaddleOCR # If you want the GPU-accelerated version, run manually pip install paddlepaddle-gpu<=3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/ paddle = [ "paddlepaddle>=3.0.0,<=3.2.1", "paddleocr==3.3.0", ] # Extra dependencies for VLM models # For torch you should use --index-url https://download.pytorch.org/whl/cu126 for cuda support for paddleocr, need to install manually vlm = [ "torch>=2.5.1,<=2.8.0", "torchvision>=0.20.1", "transformers==4.57.1", "accelerate==1.11.0", ] # Configuration for Ruff linter: [tool.ruff] line-length = 88 [tool.ruff.lint] select = ["E", "F", "I"] ignore = [ "E501", # line-too-long (handled with Black) "E402", # module-import-not-at-top-of-file (sometimes needed for conditional imports) ] [tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401"] # Allow unused imports in __init__.py # Configuration for a Black formatter: [tool.black] line-length = 88 target-version = ['py310'] # Configuration for pytest: [tool.pytest.ini_options] filterwarnings = [ "ignore::DeprecationWarning:click.parser", "ignore::DeprecationWarning:weasel.util.config", "ignore::DeprecationWarning:builtin type", "ignore::DeprecationWarning:websockets.legacy", "ignore::DeprecationWarning:websockets.server", "ignore::DeprecationWarning:spacy.cli._util", "ignore::DeprecationWarning:weasel.util.config", "ignore::DeprecationWarning:importlib._bootstrap", ] testpaths = ["test"] python_files = ["test_*.py", "*_test.py"] python_classes = ["Test*"] python_functions = ["test_*"] addopts = [ "-v", "--tb=short", "--strict-markers", "--disable-warnings", ]