# grab wheels first, then fall back to PyPI
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
--prefer-binary        # tell pip “pick wheels if you can”

gradio>=5.41.0
fastapi>=0.116
uvicorn[standard]>=0.35
huggingface-hub>=0.22

# use a version that actually has a pre-built CPU wheel
llama-cpp-python==0.3.2