30 lines
756 B
Bash
Executable File
30 lines
756 B
Bash
Executable File
#!/bin/bash
|
|
# Node Matali: Gemma-4-26B-A4B-it
|
|
# GPU Mapping: 0, 7
|
|
|
|
# 1. Point to the BIG drive
|
|
export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
|
|
export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache
|
|
|
|
# 2. Source the environment
|
|
source /home/isnai/anaconda3/etc/profile.d/conda.sh
|
|
conda activate swarm
|
|
|
|
export CUDA_VISIBLE_DEVICES=0,7
|
|
export NCCL_P2P_DISABLE=0
|
|
|
|
# 3. Launch
|
|
python3 -m sglang.launch_server \
|
|
--model-path google/gemma-4-26b-a4b-it \
|
|
--tp 2 \
|
|
--port 3000 \
|
|
--host 0.0.0.0 \
|
|
--attention-backend triton \
|
|
--mem-fraction-static 0.8 \
|
|
--max-running-requests 128\
|
|
--chunked-prefill-size 4096\
|
|
--context-length 32768 \
|
|
--trust-remote-code \
|
|
--enable-piecewise-cuda-graph \
|
|
--schedule-policy lpm
|