#!/bin/bash # Node Matali: Gemma-4-26B-A4B-it # GPU Mapping: 0, 7 # 1. Point to the BIG drive export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache # 2. Source the environment source /home/isnai/anaconda3/etc/profile.d/conda.sh conda activate swarm export CUDA_VISIBLE_DEVICES=0,7 export NCCL_P2P_DISABLE=0 # 3. Launch python3 -m sglang.launch_server \ --model-path google/gemma-4-26b-a4b-it \ --tp 2 \ --port 3000 \ --host 0.0.0.0 \ --attention-backend triton \ --mem-fraction-static 0.8 \ --max-running-requests 128\ --chunked-prefill-size 4096\ --context-length 32768 \ --trust-remote-code \ --enable-piecewise-cuda-graph \ --schedule-policy lpm