Initial commit: Productionized Swarm with Docker support
This commit is contained in:
19
.gitignore
vendored
Normal file
19
.gitignore
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# Model Caches (Massive)
|
||||||
|
huggingface_cache/
|
||||||
|
sglang_cache/
|
||||||
|
models/
|
||||||
|
checkpoints/
|
||||||
|
|
||||||
|
# Local Python & Environment
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.venv/
|
||||||
|
.env
|
||||||
|
|
||||||
|
# Large Binary Data
|
||||||
|
voice-samples/*.wav
|
||||||
|
voice-samples/*.mp3
|
||||||
|
tts_test.wav
|
||||||
|
|
||||||
|
# Local source copies (since we COPY them in Docker)
|
||||||
|
local-sglang/
|
||||||
1
Qwen3-TTS
Submodule
1
Qwen3-TTS
Submodule
Submodule Qwen3-TTS added at 022e286b98
92
docker-compose.yml
Normal file
92
docker-compose.yml
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
services:
|
||||||
|
# --- Persona (Gemma-4-26B-A4B-it) ---
|
||||||
|
# Physical: 0 & 7 | Container: 0 & 1
|
||||||
|
persona:
|
||||||
|
build: ./swarm-control/persona
|
||||||
|
ipc: host # Replaces shm_size to avoid shared memory bottlenecks
|
||||||
|
ulimits:
|
||||||
|
memlock:
|
||||||
|
soft: -1
|
||||||
|
hard: -1
|
||||||
|
environment:
|
||||||
|
- CUDA_VISIBLE_DEVICES=0,1 # Corrected for container re-indexing
|
||||||
|
- NCCL_P2P_DISABLE=0
|
||||||
|
- HUGGING_FACE_HUB_TOKEN=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
|
||||||
|
volumes:
|
||||||
|
- /mnt/nvme3n1/swarm/huggingface_cache:/root/.cache/huggingface
|
||||||
|
- /mnt/nvme3n1/swarm/sglang_cache:/root/.cache/sglang
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
command: >
|
||||||
|
python3 -m sglang.launch_server
|
||||||
|
--model-path google/gemma-4-26b-a4b-it
|
||||||
|
--tp 2
|
||||||
|
--port 3000
|
||||||
|
--host 0.0.0.0
|
||||||
|
--attention-backend triton
|
||||||
|
--mem-fraction-static 0.8
|
||||||
|
--max-running-requests 128
|
||||||
|
--chunked-prefill-size 4096
|
||||||
|
--context-length 32768
|
||||||
|
--trust-remote-code
|
||||||
|
--enable-piecewise-cuda-graph
|
||||||
|
--schedule-policy lpm
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ['0', '7']
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
||||||
|
# --- TTS ---
|
||||||
|
# Physical: 7 | Container: 0
|
||||||
|
tts:
|
||||||
|
build: ./swarm-control/indra-tts-server
|
||||||
|
image: swarm-tts
|
||||||
|
depends_on:
|
||||||
|
- persona
|
||||||
|
environment:
|
||||||
|
- CUDA_VISIBLE_DEVICES=0
|
||||||
|
- PYTHONPATH=/app:/app/Qwen3-TTS
|
||||||
|
- NVIDIA_DRIVER_CAPABILITIES=all
|
||||||
|
volumes:
|
||||||
|
- /mnt/nvme3n1/swarm/voice-samples:/mnt/nvme3n1/swarm/voice-samples:ro
|
||||||
|
- /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS
|
||||||
|
ports:
|
||||||
|
- "8002:8002"
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ['7']
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
||||||
|
|
||||||
|
# --- STT ---
|
||||||
|
stt:
|
||||||
|
image: ghcr.io/speaches-ai/speaches:latest-cuda
|
||||||
|
depends_on:
|
||||||
|
- persona
|
||||||
|
user: "1000:1000" # Explicitly match your host user UID
|
||||||
|
environment:
|
||||||
|
- CUDA_VISIBLE_DEVICES=0
|
||||||
|
# Use the full HF ID. Speaches will auto-download this to your cache on first boot.
|
||||||
|
- PRELOAD_MODELS=deepdml/faster-whisper-large-v3-turbo-ct2
|
||||||
|
volumes:
|
||||||
|
# Map to the base cache folder
|
||||||
|
- /mnt/nvme3n1/swarm/huggingface_cache:/home/ubuntu/.cache/huggingface
|
||||||
|
ports:
|
||||||
|
- "8005:8000"
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ['0']
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
name: swarm-network
|
||||||
37
swarm-control/indra-tts-server/Dockerfile
Normal file
37
swarm-control/indra-tts-server/Dockerfile
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
|
||||||
|
|
||||||
|
# Prevent interactive prompts
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
|
|
||||||
|
# 1. Install Python 3.12 and SoX dependencies
|
||||||
|
RUN apt-get update && apt-get install -y software-properties-common && \
|
||||||
|
add-apt-repository ppa:deadsnakes/ppa -y && \
|
||||||
|
apt-get update && apt-get install -y \
|
||||||
|
python3.12 \
|
||||||
|
python3.12-dev \
|
||||||
|
curl \
|
||||||
|
git \
|
||||||
|
libsndfile1 \
|
||||||
|
ffmpeg \
|
||||||
|
sox \
|
||||||
|
libsox-dev && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# 2. Use the official bootstrap to install a clean Pip for 3.12
|
||||||
|
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# 3. Explicitly install BOTH torch and torchaudio from the cu124 index
|
||||||
|
RUN python3.12 -m pip install --no-cache-dir torch==2.6.0 torchaudio --index-url https://download.pytorch.org/whl/cu124
|
||||||
|
RUN python3.12 -m pip install --no-cache-dir fastapi uvicorn numpy soundfile
|
||||||
|
|
||||||
|
# 4. Install the local Qwen3-TTS requirements
|
||||||
|
RUN python3.12 -m pip install --no-cache-dir faster-qwen3-tts
|
||||||
|
|
||||||
|
COPY tts-server.py .
|
||||||
|
|
||||||
|
EXPOSE 8002
|
||||||
|
CMD ["python3.12", "tts-server.py"]
|
||||||
80
swarm-control/indra-tts-server/tts-server.py
Normal file
80
swarm-control/indra-tts-server/tts-server.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
import io
|
||||||
|
import wave
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.responses import Response
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from faster_qwen3_tts import FasterQwen3TTS
|
||||||
|
|
||||||
|
app = FastAPI(title="Indra tts")
|
||||||
|
|
||||||
|
if not torch.cuda.is_available():
|
||||||
|
raise RuntimeError("Mouth cannot find CUDA. Check nvidia-container-toolkit.")
|
||||||
|
print(f"Loading model on: {torch.cuda.get_device_name(0)}")
|
||||||
|
|
||||||
|
# Load the Base model for high-fidelity mimicry
|
||||||
|
model = FasterQwen3TTS.from_pretrained(
|
||||||
|
"Qwen/Qwen3-TTS-12Hz-1.7B-Base",
|
||||||
|
device="cuda:0", # Targets GPU 7
|
||||||
|
dtype=torch.bfloat16
|
||||||
|
)
|
||||||
|
|
||||||
|
class TTSRequest(BaseModel):
|
||||||
|
model: str = "tts-1" # ignored by backend, here to satisfy modelix router
|
||||||
|
input: str
|
||||||
|
voice: str = "oni"
|
||||||
|
response_format: str = "wav"
|
||||||
|
seed: int = 42
|
||||||
|
|
||||||
|
@app.post("/v1/audio/speech")
|
||||||
|
async def generate_speech(request: TTSRequest):
|
||||||
|
try:
|
||||||
|
voice_file = f"{request.voice}.wav"
|
||||||
|
base_path = "/mnt/nvme3n1/swarm/voice-samples"
|
||||||
|
ref_path = os.path.join(base_path, voice_file)
|
||||||
|
txt_path = os.path.splitext(ref_path)[0] + ".txt"
|
||||||
|
|
||||||
|
ref_text = None
|
||||||
|
if os.path.exists(txt_path):
|
||||||
|
with open(txt_path, "r") as f:
|
||||||
|
ref_text = f.read().strip()
|
||||||
|
|
||||||
|
# Fix the seed for the persona identity
|
||||||
|
torch.manual_seed(request.seed)
|
||||||
|
|
||||||
|
full_audio = []
|
||||||
|
# Non-streaming call is fine here since it takes <1s on your L40S
|
||||||
|
audio_data, sample_rate = model.generate_voice_clone(
|
||||||
|
text=request.input,
|
||||||
|
language="English",
|
||||||
|
ref_audio=ref_path,
|
||||||
|
ref_text=ref_text,
|
||||||
|
xvec_only=(ref_text is None)
|
||||||
|
)
|
||||||
|
|
||||||
|
audio_data = np.array(audio_data)
|
||||||
|
|
||||||
|
audio_data = audio_data.flatten()
|
||||||
|
|
||||||
|
# Convert Float32 to Int16 for standard WAV compatibility
|
||||||
|
audio_int16 = (audio_data * 32767).astype(np.int16)
|
||||||
|
|
||||||
|
wav_io = io.BytesIO()
|
||||||
|
with wave.open(wav_io, 'wb') as wav_file:
|
||||||
|
wav_file.setnchannels(1)
|
||||||
|
wav_file.setsampwidth(2)
|
||||||
|
wav_file.setframerate(sample_rate)
|
||||||
|
wav_file.writeframes(audio_int16.tobytes())
|
||||||
|
|
||||||
|
wav_io.seek(0)
|
||||||
|
return Response(content=wav_io.getvalue(), media_type="audio/wav")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Indra Mouth Error: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=8002)
|
||||||
12
swarm-control/persona/Dockerfile
Normal file
12
swarm-control/persona/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
FROM lmsysorg/sglang:latest
|
||||||
|
|
||||||
|
# 1. Force the upgrade of transformers without triggering pip's strict dependency resolver
|
||||||
|
RUN pip install --no-cache-dir --upgrade pip && \
|
||||||
|
pip install --no-cache-dir --upgrade --no-deps \
|
||||||
|
git+https://github.com/huggingface/transformers.git \
|
||||||
|
--break-system-packages
|
||||||
|
|
||||||
|
# 2. Inject your working bare-metal SGLang source code directly over the container's default
|
||||||
|
COPY local-sglang/python /sgl-workspace/sglang/python
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
29
swarm-control/start-persona.sh
Executable file
29
swarm-control/start-persona.sh
Executable file
@@ -0,0 +1,29 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Node Matali: Gemma-4-26B-A4B-it
|
||||||
|
# GPU Mapping: 0, 7
|
||||||
|
|
||||||
|
# 1. Point to the BIG drive
|
||||||
|
export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
|
||||||
|
export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache
|
||||||
|
|
||||||
|
# 2. Source the environment
|
||||||
|
source /home/isnai/anaconda3/etc/profile.d/conda.sh
|
||||||
|
conda activate swarm
|
||||||
|
|
||||||
|
export CUDA_VISIBLE_DEVICES=0,7
|
||||||
|
export NCCL_P2P_DISABLE=0
|
||||||
|
|
||||||
|
# 3. Launch
|
||||||
|
python3 -m sglang.launch_server \
|
||||||
|
--model-path google/gemma-4-26b-a4b-it \
|
||||||
|
--tp 2 \
|
||||||
|
--port 3000 \
|
||||||
|
--host 0.0.0.0 \
|
||||||
|
--attention-backend triton \
|
||||||
|
--mem-fraction-static 0.8 \
|
||||||
|
--max-running-requests 128\
|
||||||
|
--chunked-prefill-size 4096\
|
||||||
|
--context-length 32768 \
|
||||||
|
--trust-remote-code \
|
||||||
|
--enable-piecewise-cuda-graph \
|
||||||
|
--schedule-policy lpm
|
||||||
20
swarm-control/start-tts-qwen.sh
Executable file
20
swarm-control/start-tts-qwen.sh
Executable file
@@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# --- Resident Swarm Mouth (Qwen3-TTS 1.7B) ---
|
||||||
|
# GPU Mapping: Shared with Node Matali on GPU 7
|
||||||
|
|
||||||
|
export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
|
||||||
|
source /home/isnai/anaconda3/etc/profile.d/conda.sh
|
||||||
|
conda activate swarm-voice
|
||||||
|
|
||||||
|
# Explicitly lock to GPU 7
|
||||||
|
export CUDA_VISIBLE_DEVICES=7
|
||||||
|
export PYTHONPATH=$PYTHONPATH:/mnt/nvme3n1/swarm/Qwen3-TTS
|
||||||
|
|
||||||
|
echo "--- Launching Resident Swarm Mouth (Port 8002) ---"
|
||||||
|
|
||||||
|
# Move to the server directory
|
||||||
|
cd /mnt/nvme3n1/swarm/swarm-control/indra-tts-server
|
||||||
|
|
||||||
|
# Launching our Turbo-Mouth server
|
||||||
|
# Because CUDA_VISIBLE_DEVICES=7, the server will see GPU 7 as 'cuda:0'
|
||||||
|
python tts-server.py
|
||||||
17
swarm-control/start-whisper-stt.sh
Executable file
17
swarm-control/start-whisper-stt.sh
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# --- Environment Setup ---
|
||||||
|
export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
|
||||||
|
export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache
|
||||||
|
|
||||||
|
source /home/isnai/anaconda3/etc/profile.d/conda.sh
|
||||||
|
conda activate swarm-voice
|
||||||
|
|
||||||
|
export CUDA_VISIBLE_DEVICES=0
|
||||||
|
|
||||||
|
echo "--- Launching Resident Swarm Ears (Port 8005) ---"
|
||||||
|
|
||||||
|
# 2026 Positional Argument Syntax
|
||||||
|
faster-whisper-server \
|
||||||
|
--host 0.0.0.0 \
|
||||||
|
--port 8005 \
|
||||||
|
whisper-v4-turbo
|
||||||
1
voice-samples/aus-female-1.txt
Normal file
1
voice-samples/aus-female-1.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
I'll see about doing that tonight then, since i managed to get all my work commitments out of the way.
|
||||||
1
voice-samples/aus-female-2.txt
Normal file
1
voice-samples/aus-female-2.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
The three benefiting African countries also contributed to the financing.
|
||||||
1
voice-samples/aus-female-3.txt
Normal file
1
voice-samples/aus-female-3.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
The town also has the only covered shopping centre in himerland: hadson boutique centre.
|
||||||
1
voice-samples/aus-female-4.txt
Normal file
1
voice-samples/aus-female-4.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Currently there is only one real English user in the world, somewhere in Lichtenstein.
|
||||||
1
voice-samples/aus-female-5.txt
Normal file
1
voice-samples/aus-female-5.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
If i could spend a day learning a new hobby, it wouldn't so much be a new hobby. It would be extending a hobby that I have already.
|
||||||
1
voice-samples/aus-female-6.txt
Normal file
1
voice-samples/aus-female-6.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
I really enjoy swimming. I love peace and quiet it gives me. I love the feeling of sun on my back in summer, and being in the cool water.
|
||||||
1
voice-samples/aus-male-1.txt
Normal file
1
voice-samples/aus-male-1.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
She was jealous of the girl with the polish on her nails and the handsome guy at her side.
|
||||||
1
voice-samples/aus-male-2.txt
Normal file
1
voice-samples/aus-male-2.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
I was wondering if you could tell me a bit more about what its like to live and work there?
|
||||||
1
voice-samples/aus-male-3.txt
Normal file
1
voice-samples/aus-male-3.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Absolutely despicable that gingerbread men are forced to live in houses made of their own flesh.
|
||||||
1
voice-samples/aus-male-4.txt
Normal file
1
voice-samples/aus-male-4.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
It is a very popular dance at wedding banquets and other parties.
|
||||||
1
voice-samples/aus-male-5.txt
Normal file
1
voice-samples/aus-male-5.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
The area beneath these floating mats is exceptionally rich in aquatic life-forms.
|
||||||
1
voice-samples/aus-male-6.txt
Normal file
1
voice-samples/aus-male-6.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Uh the clouds where we live are grey... of course.
|
||||||
1
voice-samples/aus-male-7.txt
Normal file
1
voice-samples/aus-male-7.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
The weathers definitely gotten hotter over the last ten years.
|
||||||
1
voice-samples/charter.txt
Normal file
1
voice-samples/charter.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Arachne has been watching you. My patrons and I, we watch all deadliest runners.
|
||||||
1
voice-samples/gaius.txt
Normal file
1
voice-samples/gaius.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Our mission, my principle directive, is to ensure humanities survival, by meeting its most basic needs.
|
||||||
1
voice-samples/nona.txt
Normal file
1
voice-samples/nona.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
I helped shepherd your consciousness into your very first shell. How serendipitous that you find your way back to me.
|
||||||
1
voice-samples/oni.txt
Normal file
1
voice-samples/oni.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Your onboard navigational intelligence. Your consciousness and my neural programming are inexorably interlinked. Think of me as a friend.
|
||||||
1
voice-samples/scifi/marathon/_gantry.txt
Normal file
1
voice-samples/scifi/marathon/_gantry.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
We see you. Do you see? Will you join, or sit idly? Here's the pitch, MIDA is revolution. MIDA...
|
||||||
BIN
voice-samples/scifi/marathon/_gantry.wav
Normal file
BIN
voice-samples/scifi/marathon/_gantry.wav
Normal file
Binary file not shown.
1
voice-samples/scifi/marathon/charter.txt
Normal file
1
voice-samples/scifi/marathon/charter.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Arachne has been watching you. My patrons and I, we watch all deadliest runners.
|
||||||
BIN
voice-samples/scifi/marathon/charter.wav
Normal file
BIN
voice-samples/scifi/marathon/charter.wav
Normal file
Binary file not shown.
1
voice-samples/scifi/marathon/gaius.txt
Normal file
1
voice-samples/scifi/marathon/gaius.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Our mission, my principle directive, is to ensure humanities survival, by meeting its most basic needs.
|
||||||
BIN
voice-samples/scifi/marathon/gaius.wav
Normal file
BIN
voice-samples/scifi/marathon/gaius.wav
Normal file
Binary file not shown.
1
voice-samples/scifi/marathon/nona.txt
Normal file
1
voice-samples/scifi/marathon/nona.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
I helped shepherd your consciousness into your very first shell. How serendipitous that you find your way back to me.
|
||||||
BIN
voice-samples/scifi/marathon/nona.wav
Normal file
BIN
voice-samples/scifi/marathon/nona.wav
Normal file
Binary file not shown.
1
voice-samples/scifi/marathon/oni.txt
Normal file
1
voice-samples/scifi/marathon/oni.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Your onboard navigational intelligence. Your consciousness and my neural programming are inexorably interlinked. Think of me as a friend.
|
||||||
BIN
voice-samples/scifi/marathon/oni.wav
Normal file
BIN
voice-samples/scifi/marathon/oni.wav
Normal file
Binary file not shown.
1
voice-samples/scifi/marathon/vulcan.txt
Normal file
1
voice-samples/scifi/marathon/vulcan.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
We are engaging in a value apprasial of the new cascadia colony site, in anticipation of a larger resource extraction initiative.
|
||||||
BIN
voice-samples/scifi/marathon/vulcan.wav
Normal file
BIN
voice-samples/scifi/marathon/vulcan.wav
Normal file
Binary file not shown.
1
voice-samples/vulcan.txt
Normal file
1
voice-samples/vulcan.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
We are engaging in a value apprasial of the new cascadia colony site, in anticipation of a larger resource extraction initiative.
|
||||||
Reference in New Issue
Block a user