Elias-Schwegler's picture
Upload folder using huggingface_hub
f16feb8 verified
services:
vllm-iquest-optimized:
image: vllm-blackwell-official:latest
container_name: vllm-iquest-nvfp4-hq
environment:
- VLLM_USE_V1=1
- NVIDIA_VISIBLE_DEVICES=all
- VLLM_LOGGING_LEVEL=DEBUG
volumes:
- ../NVFP4-Packed:/model
ports:
- "8001:8000"
command: >
--model /model --served-model-name iquest-coder-40b-loop --quantization modelopt --trust-remote-code --tensor-parallel-size 1 --gpu-memory-utilization 0.8 --max-model-len 32768 --enforce-eager
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [ gpu ]
benchmarking:
image: vllm-blackwell-official:latest
container_name: iquest-benchmark
volumes:
- ../NVFP4-Packed:/model
- .:/scripts
entrypoint: python3
command: /scripts/benchmark_optimized.py
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [ gpu ]