Spaces:

seanpedrickcase
/

document_redaction

Running

App Files Files Community

document_redaction / pyproject.toml

seanpedrickcase

Updated packages. Redaction examples should now update cost/estimated time taken display on click

02bcbaf about 5 hours ago

raw

history blame contribute delete

3.45 kB

	[build-system]
	requires = ["setuptools>=61.0", "wheel"]
	build-backend = "setuptools.build_meta"

	[project.urls]
	Homepage = "https://seanpedrick-case.github.io/doc_redaction/"
	Repository = "https://github.com/seanpedrick-case/doc_redaction"

	[project]
	name = "doc_redaction"
	version = "1.5.2"
	description = "Redact PDF/image-based documents, Word, or CSV/XLSX files using a Gradio-based GUI interface"
	readme = "README.md"
	requires-python = ">=3.10"
	dependencies = [
	"pdfminer.six==20251107",
	"pdf2image==1.17.0",
	"pymupdf==1.26.6",
	"opencv-python==4.12.0.88",
	"presidio_analyzer==2.2.360",
	"presidio_anonymizer==2.2.360",
	"presidio-image-redactor==0.0.57",
	"pikepdf==9.11.0",
	"pandas==2.3.3",
	"scikit-learn==1.7.2",
	"spacy==3.8.7",
	"en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz",
	"gradio==5.49.1",
	"boto3==1.40.69",
	"pyarrow==21.0.0",
	"openpyxl==3.1.5",
	"Faker==37.8.0",
	"python-levenshtein==0.27.1",
	"spaczz==0.6.1",
	"gradio_image_annotation @ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl",
	"rapidfuzz==3.14.1",
	"python-dotenv==1.0.1",
	"awslambdaric==3.1.1",
	"python-docx==1.2.0",
	"polars==1.33.1",
	"defusedxml==0.7.1",
	"numpy==2.2.6",
	]

	[project.optional-dependencies]

	# For testing
	dev = ["pytest"]
	test = ["pytest", "pytest-cov"]

	# To install the app with paddle and vlm support with pip, example command (in base folder and correct python environment): pip install .[paddle,vlm], or uv pip install .[ocr,vlm] if using uv. Note need to GPU version of Torch below

	# Extra dependencies for PaddleOCR
	# If you want the GPU-accelerated version, run manually pip install paddlepaddle-gpu<=3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
	paddle = [
	"paddlepaddle>=3.0.0,<=3.2.1",
	"paddleocr==3.3.0",
	]

	# Extra dependencies for VLM models
	# For torch you should use --index-url https://download.pytorch.org/whl/cu126 for cuda support for paddleocr, need to install manually
	vlm = [
	"torch>=2.5.1,<=2.8.0",
	"torchvision>=0.20.1",
	"transformers==4.57.1",
	"accelerate==1.11.0",
	]

	# Configuration for Ruff linter:
	[tool.ruff]
	line-length = 88

	[tool.ruff.lint]
	select = ["E", "F", "I"]
	ignore = [
	"E501", # line-too-long (handled with Black)
	"E402", # module-import-not-at-top-of-file (sometimes needed for conditional imports)
	]

	[tool.ruff.lint.per-file-ignores]
	"__init__.py" = ["F401"] # Allow unused imports in __init__.py

	# Configuration for a Black formatter:
	[tool.black]
	line-length = 88
	target-version = ['py310']

	# Configuration for pytest:
	[tool.pytest.ini_options]
	filterwarnings = [
	"ignore::DeprecationWarning:click.parser",
	"ignore::DeprecationWarning:weasel.util.config",
	"ignore::DeprecationWarning:builtin type",
	"ignore::DeprecationWarning:websockets.legacy",
	"ignore::DeprecationWarning:websockets.server",
	"ignore::DeprecationWarning:spacy.cli._util",
	"ignore::DeprecationWarning:weasel.util.config",
	"ignore::DeprecationWarning:importlib._bootstrap",
	]
	testpaths = ["test"]
	python_files = ["test_.py", "_test.py"]
	python_classes = ["Test*"]
	python_functions = ["test_*"]
	addopts = [
	"-v",
	"--tb=short",
	"--strict-markers",
	"--disable-warnings",
	]