Files
Swarm/swarm-control/start-persona.sh

30 lines
756 B
Bash
Executable File

#!/bin/bash
# Node Matali: Gemma-4-26B-A4B-it
# GPU Mapping: 0, 7
# 1. Point to the BIG drive
export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache
# 2. Source the environment
source /home/isnai/anaconda3/etc/profile.d/conda.sh
conda activate swarm
export CUDA_VISIBLE_DEVICES=0,7
export NCCL_P2P_DISABLE=0
# 3. Launch
python3 -m sglang.launch_server \
--model-path google/gemma-4-26b-a4b-it \
--tp 2 \
--port 3000 \
--host 0.0.0.0 \
--attention-backend triton \
--mem-fraction-static 0.8 \
--max-running-requests 128\
--chunked-prefill-size 4096\
--context-length 32768 \
--trust-remote-code \
--enable-piecewise-cuda-graph \
--schedule-policy lpm