Initial commit: Productionized Swarm with Docker support

2026-04-16 16:46:24 +10:00
commit ea4c11e32f
39 changed files with 331 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,19 @@
+# Model Caches (Massive)
+huggingface_cache/
+sglang_cache/
+models/
+checkpoints/
+
+# Local Python & Environment
+__pycache__/
+*.pyc
+.venv/
+.env
+
+# Large Binary Data
+voice-samples/*.wav
+voice-samples/*.mp3
+tts_test.wav
+
+# Local source copies (since we COPY them in Docker)
+local-sglang/
--- a/1
+++ b/1
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,92 @@
+services:
+  # --- Persona (Gemma-4-26B-A4B-it) ---
+  # Physical: 0 & 7 | Container: 0 & 1
+  persona:
+    build: ./swarm-control/persona
+    ipc: host # Replaces shm_size to avoid shared memory bottlenecks
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+    environment:
+      - CUDA_VISIBLE_DEVICES=0,1 # Corrected for container re-indexing
+      - NCCL_P2P_DISABLE=0
+      - HUGGING_FACE_HUB_TOKEN=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
+    volumes:
+      - /mnt/nvme3n1/swarm/huggingface_cache:/root/.cache/huggingface
+      - /mnt/nvme3n1/swarm/sglang_cache:/root/.cache/sglang
+    ports:
+      - "3000:3000"
+    command: >
+      python3 -m sglang.launch_server
+      --model-path google/gemma-4-26b-a4b-it
+      --tp 2
+      --port 3000
+      --host 0.0.0.0
+      --attention-backend triton
+      --mem-fraction-static 0.8
+      --max-running-requests 128
+      --chunked-prefill-size 4096
+      --context-length 32768
+      --trust-remote-code
+      --enable-piecewise-cuda-graph
+      --schedule-policy lpm
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0', '7']
+              capabilities: [gpu]
+
+# --- TTS ---
+  # Physical: 7 | Container: 0
+  tts:
+    build: ./swarm-control/indra-tts-server
+    image: swarm-tts
+    depends_on:
+      - persona
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
+      - PYTHONPATH=/app:/app/Qwen3-TTS
+      - NVIDIA_DRIVER_CAPABILITIES=all
+    volumes:
+      - /mnt/nvme3n1/swarm/voice-samples:/mnt/nvme3n1/swarm/voice-samples:ro
+      - /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS
+    ports:
+      - "8002:8002"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['7']
+              capabilities: [gpu]
+
+
+# --- STT ---
+  stt:
+    image: ghcr.io/speaches-ai/speaches:latest-cuda
+    depends_on:
+      - persona
+    user: "1000:1000" # Explicitly match your host user UID
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
+      # Use the full HF ID. Speaches will auto-download this to your cache on first boot.
+      - PRELOAD_MODELS=deepdml/faster-whisper-large-v3-turbo-ct2
+    volumes:
+      # Map to the base cache folder
+      - /mnt/nvme3n1/swarm/huggingface_cache:/home/ubuntu/.cache/huggingface
+    ports:
+      - "8005:8000"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
+
+networks:
+  default:
+    name: swarm-network
--- a/swarm-control/indra-tts-server/Dockerfile
+++ b/swarm-control/indra-tts-server/Dockerfile
@@ -0,0 +1,37 @@
+FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
+
+# Prevent interactive prompts
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+
+# 1. Install Python 3.12 and SoX dependencies
+RUN apt-get update && apt-get install -y software-properties-common && \
+    add-apt-repository ppa:deadsnakes/ppa -y && \
+    apt-get update && apt-get install -y \
+    python3.12 \
+    python3.12-dev \
+    curl \
+    git \
+    libsndfile1 \
+    ffmpeg \
+    sox \
+    libsox-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+# 2. Use the official bootstrap to install a clean Pip for 3.12
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
+
+WORKDIR /app
+
+# 3. Explicitly install BOTH torch and torchaudio from the cu124 index
+RUN python3.12 -m pip install --no-cache-dir torch==2.6.0 torchaudio --index-url https://download.pytorch.org/whl/cu124
+RUN python3.12 -m pip install --no-cache-dir fastapi uvicorn numpy soundfile
+
+# 4. Install the local Qwen3-TTS requirements
+RUN python3.12 -m pip install --no-cache-dir faster-qwen3-tts
+
+COPY tts-server.py .
+
+EXPOSE 8002
+CMD ["python3.12", "tts-server.py"]
--- a/swarm-control/indra-tts-server/tts-server.py
+++ b/swarm-control/indra-tts-server/tts-server.py
@@ -0,0 +1,80 @@
+import os
+import torch
+import numpy as np
+import io
+import wave
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import Response
+from pydantic import BaseModel
+from faster_qwen3_tts import FasterQwen3TTS
+
+app = FastAPI(title="Indra tts")
+
+if not torch.cuda.is_available():
+    raise RuntimeError("Mouth cannot find CUDA. Check nvidia-container-toolkit.")
+print(f"Loading model on: {torch.cuda.get_device_name(0)}")
+
+# Load the Base model for high-fidelity mimicry
+model = FasterQwen3TTS.from_pretrained(
+    "Qwen/Qwen3-TTS-12Hz-1.7B-Base",
+    device="cuda:0", # Targets GPU 7
+    dtype=torch.bfloat16
+)
+
+class TTSRequest(BaseModel):
+    model: str = "tts-1" # ignored by backend, here to satisfy modelix router
+    input: str
+    voice: str = "oni"
+    response_format: str = "wav"
+    seed: int = 42
+
+@app.post("/v1/audio/speech")
+async def generate_speech(request: TTSRequest):
+    try:
+        voice_file = f"{request.voice}.wav"
+        base_path = "/mnt/nvme3n1/swarm/voice-samples"
+        ref_path = os.path.join(base_path, voice_file)
+        txt_path = os.path.splitext(ref_path)[0] + ".txt"
+
+        ref_text = None
+        if os.path.exists(txt_path):
+            with open(txt_path, "r") as f:
+                ref_text = f.read().strip()
+
+        # Fix the seed for the persona identity
+        torch.manual_seed(request.seed)
+
+        full_audio = []
+        # Non-streaming call is fine here since it takes <1s on your L40S
+        audio_data, sample_rate = model.generate_voice_clone(
+            text=request.input,
+            language="English",
+            ref_audio=ref_path,
+            ref_text=ref_text,
+            xvec_only=(ref_text is None)
+        )
+
+        audio_data = np.array(audio_data)
+
+        audio_data = audio_data.flatten()
+
+        # Convert Float32 to Int16 for standard WAV compatibility
+        audio_int16 = (audio_data * 32767).astype(np.int16)
+
+        wav_io = io.BytesIO()
+        with wave.open(wav_io, 'wb') as wav_file:
+            wav_file.setnchannels(1)
+            wav_file.setsampwidth(2)
+            wav_file.setframerate(sample_rate)
+            wav_file.writeframes(audio_int16.tobytes())
+
+        wav_io.seek(0)
+        return Response(content=wav_io.getvalue(), media_type="audio/wav")
+
+    except Exception as e:
+        print(f"Indra Mouth Error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8002)
--- a/swarm-control/persona/Dockerfile
+++ b/swarm-control/persona/Dockerfile
@@ -0,0 +1,12 @@
+FROM lmsysorg/sglang:latest
+
+# 1. Force the upgrade of transformers without triggering pip's strict dependency resolver
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir --upgrade --no-deps \
+    git+https://github.com/huggingface/transformers.git \
+    --break-system-packages
+
+# 2. Inject your working bare-metal SGLang source code directly over the container's default
+COPY local-sglang/python /sgl-workspace/sglang/python
+
+WORKDIR /app
--- a/swarm-control/start-persona.sh
+++ b/swarm-control/start-persona.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Node Matali: Gemma-4-26B-A4B-it
+# GPU Mapping: 0, 7
+
+# 1. Point to the BIG drive
+export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
+export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache
+
+# 2. Source the environment
+source /home/isnai/anaconda3/etc/profile.d/conda.sh
+conda activate swarm
+
+export CUDA_VISIBLE_DEVICES=0,7
+export NCCL_P2P_DISABLE=0 
+
+# 3. Launch
+python3 -m sglang.launch_server \
+    --model-path google/gemma-4-26b-a4b-it \
+    --tp 2 \
+    --port 3000 \
+    --host 0.0.0.0 \
+    --attention-backend triton \
+    --mem-fraction-static 0.8 \
+    --max-running-requests 128\
+    --chunked-prefill-size 4096\
+    --context-length 32768 \
+    --trust-remote-code \
+    --enable-piecewise-cuda-graph \
+    --schedule-policy lpm
--- a/swarm-control/start-tts-qwen.sh
+++ b/swarm-control/start-tts-qwen.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+# --- Resident Swarm Mouth (Qwen3-TTS 1.7B) ---
+# GPU Mapping: Shared with Node Matali on GPU 7
+
+export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
+source /home/isnai/anaconda3/etc/profile.d/conda.sh
+conda activate swarm-voice
+
+# Explicitly lock to GPU 7
+export CUDA_VISIBLE_DEVICES=7
+export PYTHONPATH=$PYTHONPATH:/mnt/nvme3n1/swarm/Qwen3-TTS
+
+echo "--- Launching Resident Swarm Mouth (Port 8002) ---"
+
+# Move to the server directory
+cd /mnt/nvme3n1/swarm/swarm-control/indra-tts-server
+
+# Launching our Turbo-Mouth server
+# Because CUDA_VISIBLE_DEVICES=7, the server will see GPU 7 as 'cuda:0'
+python tts-server.py
--- a/swarm-control/start-whisper-stt.sh
+++ b/swarm-control/start-whisper-stt.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# --- Environment Setup ---
+export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
+export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache
+
+source /home/isnai/anaconda3/etc/profile.d/conda.sh
+conda activate swarm-voice
+
+export CUDA_VISIBLE_DEVICES=0
+
+echo "--- Launching Resident Swarm Ears (Port 8005) ---"
+
+# 2026 Positional Argument Syntax
+faster-whisper-server \
+    --host 0.0.0.0 \
+    --port 8005 \
+    whisper-v4-turbo
--- a/voice-samples/aus-female-1.txt
+++ b/voice-samples/aus-female-1.txt
@@ -0,0 +1 @@
+I'll see about doing that tonight then, since i managed to get all my work commitments out of the way.
--- a/voice-samples/aus-female-2.txt
+++ b/voice-samples/aus-female-2.txt
@@ -0,0 +1 @@
+The three benefiting African countries also contributed to the financing. 
--- a/voice-samples/aus-female-3.txt
+++ b/voice-samples/aus-female-3.txt
@@ -0,0 +1 @@
+The town also has the only covered shopping centre in himerland: hadson boutique centre.
--- a/voice-samples/aus-female-4.txt
+++ b/voice-samples/aus-female-4.txt
@@ -0,0 +1 @@
+Currently there is only one real English user in the world, somewhere in Lichtenstein.
--- a/voice-samples/aus-female-5.txt
+++ b/voice-samples/aus-female-5.txt
@@ -0,0 +1 @@
+If i could spend a day learning a new hobby, it wouldn't so much be a new hobby. It would be extending a hobby that I have already.
--- a/voice-samples/aus-female-6.txt
+++ b/voice-samples/aus-female-6.txt
@@ -0,0 +1 @@
+I really enjoy swimming. I love peace and quiet it gives me. I love the feeling of sun on my back in summer, and being in the cool water.
--- a/voice-samples/aus-male-1.txt
+++ b/voice-samples/aus-male-1.txt
@@ -0,0 +1 @@
+She was jealous of the girl with the polish on her nails and the handsome guy at her side.
--- a/voice-samples/aus-male-2.txt
+++ b/voice-samples/aus-male-2.txt
@@ -0,0 +1 @@
+I was wondering if you could tell me a bit more about what its like to live and work there? 
--- a/voice-samples/aus-male-3.txt
+++ b/voice-samples/aus-male-3.txt
@@ -0,0 +1 @@
+Absolutely despicable that gingerbread men are forced to live in houses made of their own flesh.
--- a/voice-samples/aus-male-4.txt
+++ b/voice-samples/aus-male-4.txt
@@ -0,0 +1 @@
+It is a very popular dance at wedding banquets and other parties.
--- a/voice-samples/aus-male-5.txt
+++ b/voice-samples/aus-male-5.txt
@@ -0,0 +1 @@
+The area beneath these floating mats is exceptionally rich in aquatic life-forms.
--- a/voice-samples/aus-male-6.txt
+++ b/voice-samples/aus-male-6.txt
@@ -0,0 +1 @@
+Uh the clouds where we live are grey... of course. 
--- a/voice-samples/aus-male-7.txt
+++ b/voice-samples/aus-male-7.txt
@@ -0,0 +1 @@
+The weathers definitely gotten hotter over the last ten years. 
--- a/voice-samples/charter.txt
+++ b/voice-samples/charter.txt
@@ -0,0 +1 @@
+Arachne has been watching you. My patrons and I, we watch all deadliest runners.
--- a/voice-samples/gaius.txt
+++ b/voice-samples/gaius.txt
@@ -0,0 +1 @@
+Our mission, my principle directive, is to ensure humanities survival, by meeting its most basic needs.
--- a/voice-samples/nona.txt
+++ b/voice-samples/nona.txt
@@ -0,0 +1 @@
+I helped shepherd your consciousness into your very first shell. How serendipitous that you find your way back to me.
--- a/voice-samples/oni.txt
+++ b/voice-samples/oni.txt
@@ -0,0 +1 @@
+Your onboard navigational intelligence. Your consciousness and my neural programming are inexorably interlinked. Think of me as a friend.
--- a/voice-samples/scifi/marathon/_gantry.txt
+++ b/voice-samples/scifi/marathon/_gantry.txt
@@ -0,0 +1 @@
+We see you. Do you see? Will you join, or sit idly? Here's the pitch, MIDA is revolution. MIDA...
--- a/voice-samples/scifi/marathon/_gantry.wav
+++ b/voice-samples/scifi/marathon/_gantry.wav
--- a/voice-samples/scifi/marathon/charter.txt
+++ b/voice-samples/scifi/marathon/charter.txt
@@ -0,0 +1 @@
+Arachne has been watching you. My patrons and I, we watch all deadliest runners.
--- a/voice-samples/scifi/marathon/charter.wav
+++ b/voice-samples/scifi/marathon/charter.wav
--- a/voice-samples/scifi/marathon/gaius.txt
+++ b/voice-samples/scifi/marathon/gaius.txt
@@ -0,0 +1 @@
+Our mission, my principle directive, is to ensure humanities survival, by meeting its most basic needs.
--- a/voice-samples/scifi/marathon/gaius.wav
+++ b/voice-samples/scifi/marathon/gaius.wav
--- a/voice-samples/scifi/marathon/nona.txt
+++ b/voice-samples/scifi/marathon/nona.txt
@@ -0,0 +1 @@
+I helped shepherd your consciousness into your very first shell. How serendipitous that you find your way back to me.
--- a/voice-samples/scifi/marathon/nona.wav
+++ b/voice-samples/scifi/marathon/nona.wav
--- a/voice-samples/scifi/marathon/oni.txt
+++ b/voice-samples/scifi/marathon/oni.txt
@@ -0,0 +1 @@
+Your onboard navigational intelligence. Your consciousness and my neural programming are inexorably interlinked. Think of me as a friend.
--- a/voice-samples/scifi/marathon/oni.wav
+++ b/voice-samples/scifi/marathon/oni.wav
--- a/voice-samples/scifi/marathon/vulcan.txt
+++ b/voice-samples/scifi/marathon/vulcan.txt
@@ -0,0 +1 @@
+We are engaging in a value apprasial of the new cascadia colony site, in anticipation of a larger resource extraction initiative.
--- a/voice-samples/scifi/marathon/vulcan.wav
+++ b/voice-samples/scifi/marathon/vulcan.wav
--- a/voice-samples/vulcan.txt
+++ b/voice-samples/vulcan.txt
@@ -0,0 +1 @@
+We are engaging in a value apprasial of the new cascadia colony site, in anticipation of a larger resource extraction initiative.
				`@@ -0,0 +1 @@`
				`I'll see about doing that tonight then, since i managed to get all my work commitments out of the way.`
				`@@ -0,0 +1 @@`
				`The three benefiting African countries also contributed to the financing.`
				`@@ -0,0 +1 @@`
				`The town also has the only covered shopping centre in himerland: hadson boutique centre.`
				`@@ -0,0 +1 @@`
				`Currently there is only one real English user in the world, somewhere in Lichtenstein.`
				`@@ -0,0 +1 @@`
				`If i could spend a day learning a new hobby, it wouldn't so much be a new hobby. It would be extending a hobby that I have already.`
				`@@ -0,0 +1 @@`
				`I really enjoy swimming. I love peace and quiet it gives me. I love the feeling of sun on my back in summer, and being in the cool water.`
				`@@ -0,0 +1 @@`
				`She was jealous of the girl with the polish on her nails and the handsome guy at her side.`
				`@@ -0,0 +1 @@`
				`I was wondering if you could tell me a bit more about what its like to live and work there?`
				`@@ -0,0 +1 @@`
				`Absolutely despicable that gingerbread men are forced to live in houses made of their own flesh.`
				`@@ -0,0 +1 @@`
				`It is a very popular dance at wedding banquets and other parties.`