Initial commit: Productionized Swarm with Docker support
This commit is contained in:
29
swarm-control/start-persona.sh
Executable file
29
swarm-control/start-persona.sh
Executable file
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
# Node Matali: Gemma-4-26B-A4B-it
|
||||
# GPU Mapping: 0, 7
|
||||
|
||||
# 1. Point to the BIG drive
|
||||
export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
|
||||
export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache
|
||||
|
||||
# 2. Source the environment
|
||||
source /home/isnai/anaconda3/etc/profile.d/conda.sh
|
||||
conda activate swarm
|
||||
|
||||
export CUDA_VISIBLE_DEVICES=0,7
|
||||
export NCCL_P2P_DISABLE=0
|
||||
|
||||
# 3. Launch
|
||||
python3 -m sglang.launch_server \
|
||||
--model-path google/gemma-4-26b-a4b-it \
|
||||
--tp 2 \
|
||||
--port 3000 \
|
||||
--host 0.0.0.0 \
|
||||
--attention-backend triton \
|
||||
--mem-fraction-static 0.8 \
|
||||
--max-running-requests 128\
|
||||
--chunked-prefill-size 4096\
|
||||
--context-length 32768 \
|
||||
--trust-remote-code \
|
||||
--enable-piecewise-cuda-graph \
|
||||
--schedule-policy lpm
|
||||
Reference in New Issue
Block a user