config.rope_parameters["rope_theta"] = rope_theta
#7
by
Daemontatox
- opened
config.rope_parameters["rope_theta"] = rope_theta
(APIServer pid=3453776) ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^
(APIServer pid=3453776) TypeError: 'NoneType' object does not support item assignment
i am using vllm 0.12.0.
for anyone having issues with deploying this in vllm , you can run this python script and modify the deploy command to your needs till its fixed in vllm 0.13.0
# launch_vllm.py
import sys
import multiprocessing
multiprocessing.set_start_method('fork', force=True)
from vllm.transformers_utils import config as vllm_config
_original_patch = vllm_config.patch_rope_parameters
def safe_patch_rope_parameters(config):
if not hasattr(config, 'rope_parameters') or config.rope_parameters is None:
config.rope_parameters = {'rope_type': 'default'}
elif isinstance(config.rope_parameters, dict) and 'rope_type' not in config.rope_parameters:
config.rope_parameters['rope_type'] = 'default'
return _original_patch(config)
vllm_config.patch_rope_parameters = safe_patch_rope_parameters
if __name__ == '__main__':
from vllm.entrypoints.cli.main import main
sys.exit(main())
CUDA_VISIBLE_DEVICES=2 \
nohup python launch_vllm.py serve unsloth/functiongemma-270m-it \
--port 8008 \
--host 0.0.0.0 \
--gpu-memory-utilization 0.2 \
--enable-auto-tool-choice \
--tool-call-parser hermes \
--enable-chunked-prefill \
--max-model-len 2048 \
--max-num-batched-tokens 2048 \
--enable-prefix-caching \
--tensor-parallel-size 1 \
--max-num-seqs 5 \
> gemma_server.log 2>&1 &
Daemontatox
changed discussion status to
closed