Initial commit: Productionized Swarm with Docker support

This commit is contained in:
damith
2026-04-16 16:46:24 +10:00
commit c2e2e52ff3
39 changed files with 331 additions and 0 deletions

92
docker-compose.yml Normal file
View File

@@ -0,0 +1,92 @@
services:
# --- Persona (Gemma-4-26B-A4B-it) ---
# Physical: 0 & 7 | Container: 0 & 1
persona:
build: ./swarm-control/persona
ipc: host # Replaces shm_size to avoid shared memory bottlenecks
ulimits:
memlock:
soft: -1
hard: -1
environment:
- CUDA_VISIBLE_DEVICES=0,1 # Corrected for container re-indexing
- NCCL_P2P_DISABLE=0
- HUGGING_FACE_HUB_TOKEN=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
volumes:
- /mnt/nvme3n1/swarm/huggingface_cache:/root/.cache/huggingface
- /mnt/nvme3n1/swarm/sglang_cache:/root/.cache/sglang
ports:
- "3000:3000"
command: >
python3 -m sglang.launch_server
--model-path google/gemma-4-26b-a4b-it
--tp 2
--port 3000
--host 0.0.0.0
--attention-backend triton
--mem-fraction-static 0.8
--max-running-requests 128
--chunked-prefill-size 4096
--context-length 32768
--trust-remote-code
--enable-piecewise-cuda-graph
--schedule-policy lpm
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0', '7']
capabilities: [gpu]
# --- TTS ---
# Physical: 7 | Container: 0
tts:
build: ./swarm-control/indra-tts-server
image: swarm-tts
depends_on:
- persona
environment:
- CUDA_VISIBLE_DEVICES=0
- PYTHONPATH=/app:/app/Qwen3-TTS
- NVIDIA_DRIVER_CAPABILITIES=all
volumes:
- /mnt/nvme3n1/swarm/voice-samples:/mnt/nvme3n1/swarm/voice-samples:ro
- /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS
ports:
- "8002:8002"
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['7']
capabilities: [gpu]
# --- STT ---
stt:
image: ghcr.io/speaches-ai/speaches:latest-cuda
depends_on:
- persona
user: "1000:1000" # Explicitly match your host user UID
environment:
- CUDA_VISIBLE_DEVICES=0
# Use the full HF ID. Speaches will auto-download this to your cache on first boot.
- PRELOAD_MODELS=deepdml/faster-whisper-large-v3-turbo-ct2
volumes:
# Map to the base cache folder
- /mnt/nvme3n1/swarm/huggingface_cache:/home/ubuntu/.cache/huggingface
ports:
- "8005:8000"
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
networks:
default:
name: swarm-network