Elias-Schwegler
/

IQuest-Coder-V1-40B-Loop-Instruct-NVFP4

Text Generation

iquestloopcoder

custom-architecture

Model card Files Files and versions

IQuest-Coder-V1-40B-Loop-Instruct-NVFP4 / deploy /docker-compose.yaml

Elias-Schwegler's picture

Elias-Schwegler

Upload folder using huggingface_hub

f16feb8 verified about 1 month ago

history blame contribute delete

1.06 kB

	services:
	vllm-iquest-optimized:
	image: vllm-blackwell-official:latest
	container_name: vllm-iquest-nvfp4-hq
	environment:
	- VLLM_USE_V1=1
	- NVIDIA_VISIBLE_DEVICES=all
	- VLLM_LOGGING_LEVEL=DEBUG
	volumes:
	- ../NVFP4-Packed:/model
	ports:
	- "8001:8000"
	command: >
	--model /model --served-model-name iquest-coder-40b-loop --quantization modelopt --trust-remote-code --tensor-parallel-size 1 --gpu-memory-utilization 0.8 --max-model-len 32768 --enforce-eager
	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	count: 1
	capabilities: [ gpu ]

	benchmarking:
	image: vllm-blackwell-official:latest
	container_name: iquest-benchmark
	volumes:
	- ../NVFP4-Packed:/model
	- .:/scripts
	entrypoint: python3
	command: /scripts/benchmark_optimized.py
	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	count: 1
	capabilities: [ gpu ]