ubergarm commited on
Commit
32f2b1c
·
1 Parent(s): f956ece

massage command

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -49,7 +49,7 @@ Final estimate: PPL = 5.4403 +/- 0.03421 (wiki.test.raw, compare to Q8_0 at 5.31
49
  # This example for 24GB VRAM + 96 GB RAM + 16 physical core CPU
50
  # Offload first ffn layers 0-9 on GPU VRAM.
51
  # Leave remaining ffn layers on CPU RAM.
52
- ./build/bin/llama-server
53
  --model ubergarm/Qwen3-235B-A22B-GGUF/Qwen3-235B-A22B-mix-IQ3_K-00001-of-00003.gguf \
54
  --alias ubergarm/Qwen3-235B-A22B-mix-IQ3_K \
55
  -fa -fmoe \
@@ -59,7 +59,7 @@ Final estimate: PPL = 5.4403 +/- 0.03421 (wiki.test.raw, compare to Q8_0 at 5.31
59
  -ot blk\.[0-9]\.ffn.*=CUDA0 \
60
  -ot "blk.*\.ffn.*=CPU \
61
  -ngl 99 \
62
- --threads 16
63
  -ub 4096 -b 4096 \
64
  --host 127.0.0.1 \
65
  --port 8080
 
49
  # This example for 24GB VRAM + 96 GB RAM + 16 physical core CPU
50
  # Offload first ffn layers 0-9 on GPU VRAM.
51
  # Leave remaining ffn layers on CPU RAM.
52
+ ./build/bin/llama-server \
53
  --model ubergarm/Qwen3-235B-A22B-GGUF/Qwen3-235B-A22B-mix-IQ3_K-00001-of-00003.gguf \
54
  --alias ubergarm/Qwen3-235B-A22B-mix-IQ3_K \
55
  -fa -fmoe \
 
59
  -ot blk\.[0-9]\.ffn.*=CUDA0 \
60
  -ot "blk.*\.ffn.*=CPU \
61
  -ngl 99 \
62
+ --threads 16 \
63
  -ub 4096 -b 4096 \
64
  --host 127.0.0.1 \
65
  --port 8080