Initial commit: Productionized Swarm with Docker support

2026-04-16 16:46:24 +10:00
commit ea4c11e32f
39 changed files with 331 additions and 0 deletions
--- a/swarm-control/indra-tts-server/Dockerfile
+++ b/swarm-control/indra-tts-server/Dockerfile
@@ -0,0 +1,37 @@
+FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
+
+# Prevent interactive prompts
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+
+# 1. Install Python 3.12 and SoX dependencies
+RUN apt-get update && apt-get install -y software-properties-common && \
+    add-apt-repository ppa:deadsnakes/ppa -y && \
+    apt-get update && apt-get install -y \
+    python3.12 \
+    python3.12-dev \
+    curl \
+    git \
+    libsndfile1 \
+    ffmpeg \
+    sox \
+    libsox-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+# 2. Use the official bootstrap to install a clean Pip for 3.12
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
+
+WORKDIR /app
+
+# 3. Explicitly install BOTH torch and torchaudio from the cu124 index
+RUN python3.12 -m pip install --no-cache-dir torch==2.6.0 torchaudio --index-url https://download.pytorch.org/whl/cu124
+RUN python3.12 -m pip install --no-cache-dir fastapi uvicorn numpy soundfile
+
+# 4. Install the local Qwen3-TTS requirements
+RUN python3.12 -m pip install --no-cache-dir faster-qwen3-tts
+
+COPY tts-server.py .
+
+EXPOSE 8002
+CMD ["python3.12", "tts-server.py"]
--- a/swarm-control/indra-tts-server/tts-server.py
+++ b/swarm-control/indra-tts-server/tts-server.py
@@ -0,0 +1,80 @@
+import os
+import torch
+import numpy as np
+import io
+import wave
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import Response
+from pydantic import BaseModel
+from faster_qwen3_tts import FasterQwen3TTS
+
+app = FastAPI(title="Indra tts")
+
+if not torch.cuda.is_available():
+    raise RuntimeError("Mouth cannot find CUDA. Check nvidia-container-toolkit.")
+print(f"Loading model on: {torch.cuda.get_device_name(0)}")
+
+# Load the Base model for high-fidelity mimicry
+model = FasterQwen3TTS.from_pretrained(
+    "Qwen/Qwen3-TTS-12Hz-1.7B-Base",
+    device="cuda:0", # Targets GPU 7
+    dtype=torch.bfloat16
+)
+
+class TTSRequest(BaseModel):
+    model: str = "tts-1" # ignored by backend, here to satisfy modelix router
+    input: str
+    voice: str = "oni"
+    response_format: str = "wav"
+    seed: int = 42
+
+@app.post("/v1/audio/speech")
+async def generate_speech(request: TTSRequest):
+    try:
+        voice_file = f"{request.voice}.wav"
+        base_path = "/mnt/nvme3n1/swarm/voice-samples"
+        ref_path = os.path.join(base_path, voice_file)
+        txt_path = os.path.splitext(ref_path)[0] + ".txt"
+
+        ref_text = None
+        if os.path.exists(txt_path):
+            with open(txt_path, "r") as f:
+                ref_text = f.read().strip()
+
+        # Fix the seed for the persona identity
+        torch.manual_seed(request.seed)
+
+        full_audio = []
+        # Non-streaming call is fine here since it takes <1s on your L40S
+        audio_data, sample_rate = model.generate_voice_clone(
+            text=request.input,
+            language="English",
+            ref_audio=ref_path,
+            ref_text=ref_text,
+            xvec_only=(ref_text is None)
+        )
+
+        audio_data = np.array(audio_data)
+
+        audio_data = audio_data.flatten()
+
+        # Convert Float32 to Int16 for standard WAV compatibility
+        audio_int16 = (audio_data * 32767).astype(np.int16)
+
+        wav_io = io.BytesIO()
+        with wave.open(wav_io, 'wb') as wav_file:
+            wav_file.setnchannels(1)
+            wav_file.setsampwidth(2)
+            wav_file.setframerate(sample_rate)
+            wav_file.writeframes(audio_int16.tobytes())
+
+        wav_io.seek(0)
+        return Response(content=wav_io.getvalue(), media_type="audio/wav")
+
+    except Exception as e:
+        print(f"Indra Mouth Error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8002)