commit c2e2e52ff350ad13196cae7714abea83ec0bd246 Author: damith Date: Thu Apr 16 16:46:24 2026 +1000 Initial commit: Productionized Swarm with Docker support diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d727d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +# Model Caches (Massive) +huggingface_cache/ +sglang_cache/ +models/ +checkpoints/ + +# Local Python & Environment +__pycache__/ +*.pyc +.venv/ +.env + +# Large Binary Data +voice-samples/*.wav +voice-samples/*.mp3 +tts_test.wav + +# Local source copies (since we COPY them in Docker) +local-sglang/ diff --git a/Qwen3-TTS b/Qwen3-TTS new file mode 160000 index 0000000..022e286 --- /dev/null +++ b/Qwen3-TTS @@ -0,0 +1 @@ +Subproject commit 022e286b98fbec7e1e916cb940cdf532cd9f488e diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..24f0054 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,92 @@ +services: + # --- Persona (Gemma-4-26B-A4B-it) --- + # Physical: 0 & 7 | Container: 0 & 1 + persona: + build: ./swarm-control/persona + ipc: host # Replaces shm_size to avoid shared memory bottlenecks + ulimits: + memlock: + soft: -1 + hard: -1 + environment: + - CUDA_VISIBLE_DEVICES=0,1 # Corrected for container re-indexing + - NCCL_P2P_DISABLE=0 + - HUGGING_FACE_HUB_TOKEN=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 + volumes: + - /mnt/nvme3n1/swarm/huggingface_cache:/root/.cache/huggingface + - /mnt/nvme3n1/swarm/sglang_cache:/root/.cache/sglang + ports: + - "3000:3000" + command: > + python3 -m sglang.launch_server + --model-path google/gemma-4-26b-a4b-it + --tp 2 + --port 3000 + --host 0.0.0.0 + --attention-backend triton + --mem-fraction-static 0.8 + --max-running-requests 128 + --chunked-prefill-size 4096 + --context-length 32768 + --trust-remote-code + --enable-piecewise-cuda-graph + --schedule-policy lpm + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0', '7'] + capabilities: [gpu] + +# --- TTS --- + # Physical: 7 | Container: 0 + tts: + build: ./swarm-control/indra-tts-server + image: swarm-tts + depends_on: + - persona + environment: + - CUDA_VISIBLE_DEVICES=0 + - PYTHONPATH=/app:/app/Qwen3-TTS + - NVIDIA_DRIVER_CAPABILITIES=all + volumes: + - /mnt/nvme3n1/swarm/voice-samples:/mnt/nvme3n1/swarm/voice-samples:ro + - /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS + ports: + - "8002:8002" + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['7'] + capabilities: [gpu] + + +# --- STT --- + stt: + image: ghcr.io/speaches-ai/speaches:latest-cuda + depends_on: + - persona + user: "1000:1000" # Explicitly match your host user UID + environment: + - CUDA_VISIBLE_DEVICES=0 + # Use the full HF ID. Speaches will auto-download this to your cache on first boot. + - PRELOAD_MODELS=deepdml/faster-whisper-large-v3-turbo-ct2 + volumes: + # Map to the base cache folder + - /mnt/nvme3n1/swarm/huggingface_cache:/home/ubuntu/.cache/huggingface + ports: + - "8005:8000" + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + +networks: + default: + name: swarm-network diff --git a/swarm-control/indra-tts-server/Dockerfile b/swarm-control/indra-tts-server/Dockerfile new file mode 100644 index 0000000..b30dca9 --- /dev/null +++ b/swarm-control/indra-tts-server/Dockerfile @@ -0,0 +1,37 @@ +FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 + +# Prevent interactive prompts +ENV DEBIAN_FRONTEND=noninteractive +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility + +# 1. Install Python 3.12 and SoX dependencies +RUN apt-get update && apt-get install -y software-properties-common && \ + add-apt-repository ppa:deadsnakes/ppa -y && \ + apt-get update && apt-get install -y \ + python3.12 \ + python3.12-dev \ + curl \ + git \ + libsndfile1 \ + ffmpeg \ + sox \ + libsox-dev && \ + rm -rf /var/lib/apt/lists/* + +# 2. Use the official bootstrap to install a clean Pip for 3.12 +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 + +WORKDIR /app + +# 3. Explicitly install BOTH torch and torchaudio from the cu124 index +RUN python3.12 -m pip install --no-cache-dir torch==2.6.0 torchaudio --index-url https://download.pytorch.org/whl/cu124 +RUN python3.12 -m pip install --no-cache-dir fastapi uvicorn numpy soundfile + +# 4. Install the local Qwen3-TTS requirements +RUN python3.12 -m pip install --no-cache-dir faster-qwen3-tts + +COPY tts-server.py . + +EXPOSE 8002 +CMD ["python3.12", "tts-server.py"] diff --git a/swarm-control/indra-tts-server/tts-server.py b/swarm-control/indra-tts-server/tts-server.py new file mode 100644 index 0000000..15c3c4b --- /dev/null +++ b/swarm-control/indra-tts-server/tts-server.py @@ -0,0 +1,80 @@ +import os +import torch +import numpy as np +import io +import wave +from fastapi import FastAPI, HTTPException +from fastapi.responses import Response +from pydantic import BaseModel +from faster_qwen3_tts import FasterQwen3TTS + +app = FastAPI(title="Indra tts") + +if not torch.cuda.is_available(): + raise RuntimeError("Mouth cannot find CUDA. Check nvidia-container-toolkit.") +print(f"Loading model on: {torch.cuda.get_device_name(0)}") + +# Load the Base model for high-fidelity mimicry +model = FasterQwen3TTS.from_pretrained( + "Qwen/Qwen3-TTS-12Hz-1.7B-Base", + device="cuda:0", # Targets GPU 7 + dtype=torch.bfloat16 +) + +class TTSRequest(BaseModel): + model: str = "tts-1" # ignored by backend, here to satisfy modelix router + input: str + voice: str = "oni" + response_format: str = "wav" + seed: int = 42 + +@app.post("/v1/audio/speech") +async def generate_speech(request: TTSRequest): + try: + voice_file = f"{request.voice}.wav" + base_path = "/mnt/nvme3n1/swarm/voice-samples" + ref_path = os.path.join(base_path, voice_file) + txt_path = os.path.splitext(ref_path)[0] + ".txt" + + ref_text = None + if os.path.exists(txt_path): + with open(txt_path, "r") as f: + ref_text = f.read().strip() + + # Fix the seed for the persona identity + torch.manual_seed(request.seed) + + full_audio = [] + # Non-streaming call is fine here since it takes <1s on your L40S + audio_data, sample_rate = model.generate_voice_clone( + text=request.input, + language="English", + ref_audio=ref_path, + ref_text=ref_text, + xvec_only=(ref_text is None) + ) + + audio_data = np.array(audio_data) + + audio_data = audio_data.flatten() + + # Convert Float32 to Int16 for standard WAV compatibility + audio_int16 = (audio_data * 32767).astype(np.int16) + + wav_io = io.BytesIO() + with wave.open(wav_io, 'wb') as wav_file: + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(sample_rate) + wav_file.writeframes(audio_int16.tobytes()) + + wav_io.seek(0) + return Response(content=wav_io.getvalue(), media_type="audio/wav") + + except Exception as e: + print(f"Indra Mouth Error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8002) diff --git a/swarm-control/persona/Dockerfile b/swarm-control/persona/Dockerfile new file mode 100644 index 0000000..c1f3fb9 --- /dev/null +++ b/swarm-control/persona/Dockerfile @@ -0,0 +1,12 @@ +FROM lmsysorg/sglang:latest + +# 1. Force the upgrade of transformers without triggering pip's strict dependency resolver +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir --upgrade --no-deps \ + git+https://github.com/huggingface/transformers.git \ + --break-system-packages + +# 2. Inject your working bare-metal SGLang source code directly over the container's default +COPY local-sglang/python /sgl-workspace/sglang/python + +WORKDIR /app diff --git a/swarm-control/start-persona.sh b/swarm-control/start-persona.sh new file mode 100755 index 0000000..fe8ebc1 --- /dev/null +++ b/swarm-control/start-persona.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Node Matali: Gemma-4-26B-A4B-it +# GPU Mapping: 0, 7 + +# 1. Point to the BIG drive +export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache +export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache + +# 2. Source the environment +source /home/isnai/anaconda3/etc/profile.d/conda.sh +conda activate swarm + +export CUDA_VISIBLE_DEVICES=0,7 +export NCCL_P2P_DISABLE=0 + +# 3. Launch +python3 -m sglang.launch_server \ + --model-path google/gemma-4-26b-a4b-it \ + --tp 2 \ + --port 3000 \ + --host 0.0.0.0 \ + --attention-backend triton \ + --mem-fraction-static 0.8 \ + --max-running-requests 128\ + --chunked-prefill-size 4096\ + --context-length 32768 \ + --trust-remote-code \ + --enable-piecewise-cuda-graph \ + --schedule-policy lpm diff --git a/swarm-control/start-tts-qwen.sh b/swarm-control/start-tts-qwen.sh new file mode 100755 index 0000000..1bb656c --- /dev/null +++ b/swarm-control/start-tts-qwen.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# --- Resident Swarm Mouth (Qwen3-TTS 1.7B) --- +# GPU Mapping: Shared with Node Matali on GPU 7 + +export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache +source /home/isnai/anaconda3/etc/profile.d/conda.sh +conda activate swarm-voice + +# Explicitly lock to GPU 7 +export CUDA_VISIBLE_DEVICES=7 +export PYTHONPATH=$PYTHONPATH:/mnt/nvme3n1/swarm/Qwen3-TTS + +echo "--- Launching Resident Swarm Mouth (Port 8002) ---" + +# Move to the server directory +cd /mnt/nvme3n1/swarm/swarm-control/indra-tts-server + +# Launching our Turbo-Mouth server +# Because CUDA_VISIBLE_DEVICES=7, the server will see GPU 7 as 'cuda:0' +python tts-server.py diff --git a/swarm-control/start-whisper-stt.sh b/swarm-control/start-whisper-stt.sh new file mode 100755 index 0000000..205f1ee --- /dev/null +++ b/swarm-control/start-whisper-stt.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# --- Environment Setup --- +export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache +export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache + +source /home/isnai/anaconda3/etc/profile.d/conda.sh +conda activate swarm-voice + +export CUDA_VISIBLE_DEVICES=0 + +echo "--- Launching Resident Swarm Ears (Port 8005) ---" + +# 2026 Positional Argument Syntax +faster-whisper-server \ + --host 0.0.0.0 \ + --port 8005 \ + whisper-v4-turbo diff --git a/voice-samples/aus-female-1.txt b/voice-samples/aus-female-1.txt new file mode 100644 index 0000000..7eb099c --- /dev/null +++ b/voice-samples/aus-female-1.txt @@ -0,0 +1 @@ +I'll see about doing that tonight then, since i managed to get all my work commitments out of the way. \ No newline at end of file diff --git a/voice-samples/aus-female-2.txt b/voice-samples/aus-female-2.txt new file mode 100644 index 0000000..93bf108 --- /dev/null +++ b/voice-samples/aus-female-2.txt @@ -0,0 +1 @@ +The three benefiting African countries also contributed to the financing. \ No newline at end of file diff --git a/voice-samples/aus-female-3.txt b/voice-samples/aus-female-3.txt new file mode 100644 index 0000000..ba661c1 --- /dev/null +++ b/voice-samples/aus-female-3.txt @@ -0,0 +1 @@ +The town also has the only covered shopping centre in himerland: hadson boutique centre. diff --git a/voice-samples/aus-female-4.txt b/voice-samples/aus-female-4.txt new file mode 100644 index 0000000..845423e --- /dev/null +++ b/voice-samples/aus-female-4.txt @@ -0,0 +1 @@ +Currently there is only one real English user in the world, somewhere in Lichtenstein. \ No newline at end of file diff --git a/voice-samples/aus-female-5.txt b/voice-samples/aus-female-5.txt new file mode 100644 index 0000000..de59fbf --- /dev/null +++ b/voice-samples/aus-female-5.txt @@ -0,0 +1 @@ +If i could spend a day learning a new hobby, it wouldn't so much be a new hobby. It would be extending a hobby that I have already. \ No newline at end of file diff --git a/voice-samples/aus-female-6.txt b/voice-samples/aus-female-6.txt new file mode 100644 index 0000000..3ff64c4 --- /dev/null +++ b/voice-samples/aus-female-6.txt @@ -0,0 +1 @@ +I really enjoy swimming. I love peace and quiet it gives me. I love the feeling of sun on my back in summer, and being in the cool water. \ No newline at end of file diff --git a/voice-samples/aus-male-1.txt b/voice-samples/aus-male-1.txt new file mode 100644 index 0000000..529bba1 --- /dev/null +++ b/voice-samples/aus-male-1.txt @@ -0,0 +1 @@ +She was jealous of the girl with the polish on her nails and the handsome guy at her side. \ No newline at end of file diff --git a/voice-samples/aus-male-2.txt b/voice-samples/aus-male-2.txt new file mode 100644 index 0000000..1d8da7b --- /dev/null +++ b/voice-samples/aus-male-2.txt @@ -0,0 +1 @@ +I was wondering if you could tell me a bit more about what its like to live and work there? \ No newline at end of file diff --git a/voice-samples/aus-male-3.txt b/voice-samples/aus-male-3.txt new file mode 100644 index 0000000..584d37c --- /dev/null +++ b/voice-samples/aus-male-3.txt @@ -0,0 +1 @@ +Absolutely despicable that gingerbread men are forced to live in houses made of their own flesh. \ No newline at end of file diff --git a/voice-samples/aus-male-4.txt b/voice-samples/aus-male-4.txt new file mode 100644 index 0000000..3bdce81 --- /dev/null +++ b/voice-samples/aus-male-4.txt @@ -0,0 +1 @@ +It is a very popular dance at wedding banquets and other parties. \ No newline at end of file diff --git a/voice-samples/aus-male-5.txt b/voice-samples/aus-male-5.txt new file mode 100644 index 0000000..d3164c1 --- /dev/null +++ b/voice-samples/aus-male-5.txt @@ -0,0 +1 @@ +The area beneath these floating mats is exceptionally rich in aquatic life-forms. \ No newline at end of file diff --git a/voice-samples/aus-male-6.txt b/voice-samples/aus-male-6.txt new file mode 100644 index 0000000..b5abb3f --- /dev/null +++ b/voice-samples/aus-male-6.txt @@ -0,0 +1 @@ +Uh the clouds where we live are grey... of course. \ No newline at end of file diff --git a/voice-samples/aus-male-7.txt b/voice-samples/aus-male-7.txt new file mode 100644 index 0000000..474c35d --- /dev/null +++ b/voice-samples/aus-male-7.txt @@ -0,0 +1 @@ +The weathers definitely gotten hotter over the last ten years. \ No newline at end of file diff --git a/voice-samples/charter.txt b/voice-samples/charter.txt new file mode 100644 index 0000000..1a25b30 --- /dev/null +++ b/voice-samples/charter.txt @@ -0,0 +1 @@ +Arachne has been watching you. My patrons and I, we watch all deadliest runners. diff --git a/voice-samples/gaius.txt b/voice-samples/gaius.txt new file mode 100644 index 0000000..e607d36 --- /dev/null +++ b/voice-samples/gaius.txt @@ -0,0 +1 @@ +Our mission, my principle directive, is to ensure humanities survival, by meeting its most basic needs. diff --git a/voice-samples/nona.txt b/voice-samples/nona.txt new file mode 100644 index 0000000..5222815 --- /dev/null +++ b/voice-samples/nona.txt @@ -0,0 +1 @@ +I helped shepherd your consciousness into your very first shell. How serendipitous that you find your way back to me. diff --git a/voice-samples/oni.txt b/voice-samples/oni.txt new file mode 100644 index 0000000..1ef2469 --- /dev/null +++ b/voice-samples/oni.txt @@ -0,0 +1 @@ +Your onboard navigational intelligence. Your consciousness and my neural programming are inexorably interlinked. Think of me as a friend. diff --git a/voice-samples/scifi/marathon/_gantry.txt b/voice-samples/scifi/marathon/_gantry.txt new file mode 100644 index 0000000..71eba92 --- /dev/null +++ b/voice-samples/scifi/marathon/_gantry.txt @@ -0,0 +1 @@ +We see you. Do you see? Will you join, or sit idly? Here's the pitch, MIDA is revolution. MIDA... diff --git a/voice-samples/scifi/marathon/_gantry.wav b/voice-samples/scifi/marathon/_gantry.wav new file mode 100644 index 0000000..b548853 Binary files /dev/null and b/voice-samples/scifi/marathon/_gantry.wav differ diff --git a/voice-samples/scifi/marathon/charter.txt b/voice-samples/scifi/marathon/charter.txt new file mode 100644 index 0000000..1a25b30 --- /dev/null +++ b/voice-samples/scifi/marathon/charter.txt @@ -0,0 +1 @@ +Arachne has been watching you. My patrons and I, we watch all deadliest runners. diff --git a/voice-samples/scifi/marathon/charter.wav b/voice-samples/scifi/marathon/charter.wav new file mode 100644 index 0000000..6f35dd6 Binary files /dev/null and b/voice-samples/scifi/marathon/charter.wav differ diff --git a/voice-samples/scifi/marathon/gaius.txt b/voice-samples/scifi/marathon/gaius.txt new file mode 100644 index 0000000..e607d36 --- /dev/null +++ b/voice-samples/scifi/marathon/gaius.txt @@ -0,0 +1 @@ +Our mission, my principle directive, is to ensure humanities survival, by meeting its most basic needs. diff --git a/voice-samples/scifi/marathon/gaius.wav b/voice-samples/scifi/marathon/gaius.wav new file mode 100644 index 0000000..cd03f18 Binary files /dev/null and b/voice-samples/scifi/marathon/gaius.wav differ diff --git a/voice-samples/scifi/marathon/nona.txt b/voice-samples/scifi/marathon/nona.txt new file mode 100644 index 0000000..5222815 --- /dev/null +++ b/voice-samples/scifi/marathon/nona.txt @@ -0,0 +1 @@ +I helped shepherd your consciousness into your very first shell. How serendipitous that you find your way back to me. diff --git a/voice-samples/scifi/marathon/nona.wav b/voice-samples/scifi/marathon/nona.wav new file mode 100644 index 0000000..e2ca5a1 Binary files /dev/null and b/voice-samples/scifi/marathon/nona.wav differ diff --git a/voice-samples/scifi/marathon/oni.txt b/voice-samples/scifi/marathon/oni.txt new file mode 100644 index 0000000..1ef2469 --- /dev/null +++ b/voice-samples/scifi/marathon/oni.txt @@ -0,0 +1 @@ +Your onboard navigational intelligence. Your consciousness and my neural programming are inexorably interlinked. Think of me as a friend. diff --git a/voice-samples/scifi/marathon/oni.wav b/voice-samples/scifi/marathon/oni.wav new file mode 100644 index 0000000..1118b7b Binary files /dev/null and b/voice-samples/scifi/marathon/oni.wav differ diff --git a/voice-samples/scifi/marathon/vulcan.txt b/voice-samples/scifi/marathon/vulcan.txt new file mode 100644 index 0000000..c135a4e --- /dev/null +++ b/voice-samples/scifi/marathon/vulcan.txt @@ -0,0 +1 @@ +We are engaging in a value apprasial of the new cascadia colony site, in anticipation of a larger resource extraction initiative. diff --git a/voice-samples/scifi/marathon/vulcan.wav b/voice-samples/scifi/marathon/vulcan.wav new file mode 100644 index 0000000..e488ae6 Binary files /dev/null and b/voice-samples/scifi/marathon/vulcan.wav differ diff --git a/voice-samples/vulcan.txt b/voice-samples/vulcan.txt new file mode 100644 index 0000000..c135a4e --- /dev/null +++ b/voice-samples/vulcan.txt @@ -0,0 +1 @@ +We are engaging in a value apprasial of the new cascadia colony site, in anticipation of a larger resource extraction initiative.