config.rope_parameters["rope_theta"] = rope_theta

#7
by Daemontatox - opened
    config.rope_parameters["rope_theta"] = rope_theta
(APIServer pid=3453776)     ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^
(APIServer pid=3453776) TypeError: 'NoneType' object does not support item assignment

i am using vllm 0.12.0.
for anyone having issues with deploying this in vllm , you can run this python script and modify the deploy command to your needs till its fixed in vllm 0.13.0

# launch_vllm.py
import sys
import multiprocessing
multiprocessing.set_start_method('fork', force=True)

from vllm.transformers_utils import config as vllm_config

_original_patch = vllm_config.patch_rope_parameters

def safe_patch_rope_parameters(config):
    if not hasattr(config, 'rope_parameters') or config.rope_parameters is None:
        config.rope_parameters = {'rope_type': 'default'}
    elif isinstance(config.rope_parameters, dict) and 'rope_type' not in config.rope_parameters:
        config.rope_parameters['rope_type'] = 'default'
    return _original_patch(config)

vllm_config.patch_rope_parameters = safe_patch_rope_parameters

if __name__ == '__main__':
    from vllm.entrypoints.cli.main import main
    sys.exit(main())
CUDA_VISIBLE_DEVICES=2 \
nohup python launch_vllm.py serve unsloth/functiongemma-270m-it \
  --port 8008 \
  --host 0.0.0.0 \
  --gpu-memory-utilization 0.2 \
  --enable-auto-tool-choice \
  --tool-call-parser hermes \
  --enable-chunked-prefill \
  --max-model-len 2048 \
  --max-num-batched-tokens 2048 \
  --enable-prefix-caching \
  --tensor-parallel-size 1 \
  --max-num-seqs 5 \
  > gemma_server.log 2>&1 &
Daemontatox changed discussion status to closed

Sign up or log in to comment