Initial commit: Productionized Swarm with Docker support
This commit is contained in:
19
.gitignore
vendored
Normal file
19
.gitignore
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
# Model Caches (Massive)
|
||||
huggingface_cache/
|
||||
sglang_cache/
|
||||
models/
|
||||
checkpoints/
|
||||
|
||||
# Local Python & Environment
|
||||
__pycache__/
|
||||
*.pyc
|
||||
.venv/
|
||||
.env
|
||||
|
||||
# Large Binary Data
|
||||
voice-samples/*.wav
|
||||
voice-samples/*.mp3
|
||||
tts_test.wav
|
||||
|
||||
# Local source copies (since we COPY them in Docker)
|
||||
local-sglang/
|
||||
1
Qwen3-TTS
Submodule
1
Qwen3-TTS
Submodule
Submodule Qwen3-TTS added at 022e286b98
92
docker-compose.yml
Normal file
92
docker-compose.yml
Normal file
@@ -0,0 +1,92 @@
|
||||
services:
|
||||
# --- Persona (Gemma-4-26B-A4B-it) ---
|
||||
# Physical: 0 & 7 | Container: 0 & 1
|
||||
persona:
|
||||
build: ./swarm-control/persona
|
||||
ipc: host # Replaces shm_size to avoid shared memory bottlenecks
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
environment:
|
||||
- CUDA_VISIBLE_DEVICES=0,1 # Corrected for container re-indexing
|
||||
- NCCL_P2P_DISABLE=0
|
||||
- HUGGING_FACE_HUB_TOKEN=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
|
||||
volumes:
|
||||
- /mnt/nvme3n1/swarm/huggingface_cache:/root/.cache/huggingface
|
||||
- /mnt/nvme3n1/swarm/sglang_cache:/root/.cache/sglang
|
||||
ports:
|
||||
- "3000:3000"
|
||||
command: >
|
||||
python3 -m sglang.launch_server
|
||||
--model-path google/gemma-4-26b-a4b-it
|
||||
--tp 2
|
||||
--port 3000
|
||||
--host 0.0.0.0
|
||||
--attention-backend triton
|
||||
--mem-fraction-static 0.8
|
||||
--max-running-requests 128
|
||||
--chunked-prefill-size 4096
|
||||
--context-length 32768
|
||||
--trust-remote-code
|
||||
--enable-piecewise-cuda-graph
|
||||
--schedule-policy lpm
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ['0', '7']
|
||||
capabilities: [gpu]
|
||||
|
||||
# --- TTS ---
|
||||
# Physical: 7 | Container: 0
|
||||
tts:
|
||||
build: ./swarm-control/indra-tts-server
|
||||
image: swarm-tts
|
||||
depends_on:
|
||||
- persona
|
||||
environment:
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- PYTHONPATH=/app:/app/Qwen3-TTS
|
||||
- NVIDIA_DRIVER_CAPABILITIES=all
|
||||
volumes:
|
||||
- /mnt/nvme3n1/swarm/voice-samples:/mnt/nvme3n1/swarm/voice-samples:ro
|
||||
- /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS
|
||||
ports:
|
||||
- "8002:8002"
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ['7']
|
||||
capabilities: [gpu]
|
||||
|
||||
|
||||
# --- STT ---
|
||||
stt:
|
||||
image: ghcr.io/speaches-ai/speaches:latest-cuda
|
||||
depends_on:
|
||||
- persona
|
||||
user: "1000:1000" # Explicitly match your host user UID
|
||||
environment:
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
# Use the full HF ID. Speaches will auto-download this to your cache on first boot.
|
||||
- PRELOAD_MODELS=deepdml/faster-whisper-large-v3-turbo-ct2
|
||||
volumes:
|
||||
# Map to the base cache folder
|
||||
- /mnt/nvme3n1/swarm/huggingface_cache:/home/ubuntu/.cache/huggingface
|
||||
ports:
|
||||
- "8005:8000"
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ['0']
|
||||
capabilities: [gpu]
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: swarm-network
|
||||
37
swarm-control/indra-tts-server/Dockerfile
Normal file
37
swarm-control/indra-tts-server/Dockerfile
Normal file
@@ -0,0 +1,37 @@
|
||||
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
|
||||
|
||||
# Prevent interactive prompts
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
|
||||
# 1. Install Python 3.12 and SoX dependencies
|
||||
RUN apt-get update && apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:deadsnakes/ppa -y && \
|
||||
apt-get update && apt-get install -y \
|
||||
python3.12 \
|
||||
python3.12-dev \
|
||||
curl \
|
||||
git \
|
||||
libsndfile1 \
|
||||
ffmpeg \
|
||||
sox \
|
||||
libsox-dev && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 2. Use the official bootstrap to install a clean Pip for 3.12
|
||||
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 3. Explicitly install BOTH torch and torchaudio from the cu124 index
|
||||
RUN python3.12 -m pip install --no-cache-dir torch==2.6.0 torchaudio --index-url https://download.pytorch.org/whl/cu124
|
||||
RUN python3.12 -m pip install --no-cache-dir fastapi uvicorn numpy soundfile
|
||||
|
||||
# 4. Install the local Qwen3-TTS requirements
|
||||
RUN python3.12 -m pip install --no-cache-dir faster-qwen3-tts
|
||||
|
||||
COPY tts-server.py .
|
||||
|
||||
EXPOSE 8002
|
||||
CMD ["python3.12", "tts-server.py"]
|
||||
80
swarm-control/indra-tts-server/tts-server.py
Normal file
80
swarm-control/indra-tts-server/tts-server.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
import io
|
||||
import wave
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import Response
|
||||
from pydantic import BaseModel
|
||||
from faster_qwen3_tts import FasterQwen3TTS
|
||||
|
||||
app = FastAPI(title="Indra tts")
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
raise RuntimeError("Mouth cannot find CUDA. Check nvidia-container-toolkit.")
|
||||
print(f"Loading model on: {torch.cuda.get_device_name(0)}")
|
||||
|
||||
# Load the Base model for high-fidelity mimicry
|
||||
model = FasterQwen3TTS.from_pretrained(
|
||||
"Qwen/Qwen3-TTS-12Hz-1.7B-Base",
|
||||
device="cuda:0", # Targets GPU 7
|
||||
dtype=torch.bfloat16
|
||||
)
|
||||
|
||||
class TTSRequest(BaseModel):
|
||||
model: str = "tts-1" # ignored by backend, here to satisfy modelix router
|
||||
input: str
|
||||
voice: str = "oni"
|
||||
response_format: str = "wav"
|
||||
seed: int = 42
|
||||
|
||||
@app.post("/v1/audio/speech")
|
||||
async def generate_speech(request: TTSRequest):
|
||||
try:
|
||||
voice_file = f"{request.voice}.wav"
|
||||
base_path = "/mnt/nvme3n1/swarm/voice-samples"
|
||||
ref_path = os.path.join(base_path, voice_file)
|
||||
txt_path = os.path.splitext(ref_path)[0] + ".txt"
|
||||
|
||||
ref_text = None
|
||||
if os.path.exists(txt_path):
|
||||
with open(txt_path, "r") as f:
|
||||
ref_text = f.read().strip()
|
||||
|
||||
# Fix the seed for the persona identity
|
||||
torch.manual_seed(request.seed)
|
||||
|
||||
full_audio = []
|
||||
# Non-streaming call is fine here since it takes <1s on your L40S
|
||||
audio_data, sample_rate = model.generate_voice_clone(
|
||||
text=request.input,
|
||||
language="English",
|
||||
ref_audio=ref_path,
|
||||
ref_text=ref_text,
|
||||
xvec_only=(ref_text is None)
|
||||
)
|
||||
|
||||
audio_data = np.array(audio_data)
|
||||
|
||||
audio_data = audio_data.flatten()
|
||||
|
||||
# Convert Float32 to Int16 for standard WAV compatibility
|
||||
audio_int16 = (audio_data * 32767).astype(np.int16)
|
||||
|
||||
wav_io = io.BytesIO()
|
||||
with wave.open(wav_io, 'wb') as wav_file:
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setframerate(sample_rate)
|
||||
wav_file.writeframes(audio_int16.tobytes())
|
||||
|
||||
wav_io.seek(0)
|
||||
return Response(content=wav_io.getvalue(), media_type="audio/wav")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Indra Mouth Error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8002)
|
||||
12
swarm-control/persona/Dockerfile
Normal file
12
swarm-control/persona/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
FROM lmsysorg/sglang:latest
|
||||
|
||||
# 1. Force the upgrade of transformers without triggering pip's strict dependency resolver
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir --upgrade --no-deps \
|
||||
git+https://github.com/huggingface/transformers.git \
|
||||
--break-system-packages
|
||||
|
||||
# 2. Inject your working bare-metal SGLang source code directly over the container's default
|
||||
COPY local-sglang/python /sgl-workspace/sglang/python
|
||||
|
||||
WORKDIR /app
|
||||
29
swarm-control/start-persona.sh
Executable file
29
swarm-control/start-persona.sh
Executable file
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
# Node Matali: Gemma-4-26B-A4B-it
|
||||
# GPU Mapping: 0, 7
|
||||
|
||||
# 1. Point to the BIG drive
|
||||
export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
|
||||
export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache
|
||||
|
||||
# 2. Source the environment
|
||||
source /home/isnai/anaconda3/etc/profile.d/conda.sh
|
||||
conda activate swarm
|
||||
|
||||
export CUDA_VISIBLE_DEVICES=0,7
|
||||
export NCCL_P2P_DISABLE=0
|
||||
|
||||
# 3. Launch
|
||||
python3 -m sglang.launch_server \
|
||||
--model-path google/gemma-4-26b-a4b-it \
|
||||
--tp 2 \
|
||||
--port 3000 \
|
||||
--host 0.0.0.0 \
|
||||
--attention-backend triton \
|
||||
--mem-fraction-static 0.8 \
|
||||
--max-running-requests 128\
|
||||
--chunked-prefill-size 4096\
|
||||
--context-length 32768 \
|
||||
--trust-remote-code \
|
||||
--enable-piecewise-cuda-graph \
|
||||
--schedule-policy lpm
|
||||
20
swarm-control/start-tts-qwen.sh
Executable file
20
swarm-control/start-tts-qwen.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
# --- Resident Swarm Mouth (Qwen3-TTS 1.7B) ---
|
||||
# GPU Mapping: Shared with Node Matali on GPU 7
|
||||
|
||||
export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
|
||||
source /home/isnai/anaconda3/etc/profile.d/conda.sh
|
||||
conda activate swarm-voice
|
||||
|
||||
# Explicitly lock to GPU 7
|
||||
export CUDA_VISIBLE_DEVICES=7
|
||||
export PYTHONPATH=$PYTHONPATH:/mnt/nvme3n1/swarm/Qwen3-TTS
|
||||
|
||||
echo "--- Launching Resident Swarm Mouth (Port 8002) ---"
|
||||
|
||||
# Move to the server directory
|
||||
cd /mnt/nvme3n1/swarm/swarm-control/indra-tts-server
|
||||
|
||||
# Launching our Turbo-Mouth server
|
||||
# Because CUDA_VISIBLE_DEVICES=7, the server will see GPU 7 as 'cuda:0'
|
||||
python tts-server.py
|
||||
17
swarm-control/start-whisper-stt.sh
Executable file
17
swarm-control/start-whisper-stt.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
# --- Environment Setup ---
|
||||
export HF_HOME=/mnt/nvme3n1/swarm/huggingface_cache
|
||||
export SGLANG_CACHE_DIR=/mnt/nvme3n1/swarm/sglang_cache
|
||||
|
||||
source /home/isnai/anaconda3/etc/profile.d/conda.sh
|
||||
conda activate swarm-voice
|
||||
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
echo "--- Launching Resident Swarm Ears (Port 8005) ---"
|
||||
|
||||
# 2026 Positional Argument Syntax
|
||||
faster-whisper-server \
|
||||
--host 0.0.0.0 \
|
||||
--port 8005 \
|
||||
whisper-v4-turbo
|
||||
1
voice-samples/aus-female-1.txt
Normal file
1
voice-samples/aus-female-1.txt
Normal file
@@ -0,0 +1 @@
|
||||
I'll see about doing that tonight then, since i managed to get all my work commitments out of the way.
|
||||
1
voice-samples/aus-female-2.txt
Normal file
1
voice-samples/aus-female-2.txt
Normal file
@@ -0,0 +1 @@
|
||||
The three benefiting African countries also contributed to the financing.
|
||||
1
voice-samples/aus-female-3.txt
Normal file
1
voice-samples/aus-female-3.txt
Normal file
@@ -0,0 +1 @@
|
||||
The town also has the only covered shopping centre in himerland: hadson boutique centre.
|
||||
1
voice-samples/aus-female-4.txt
Normal file
1
voice-samples/aus-female-4.txt
Normal file
@@ -0,0 +1 @@
|
||||
Currently there is only one real English user in the world, somewhere in Lichtenstein.
|
||||
1
voice-samples/aus-female-5.txt
Normal file
1
voice-samples/aus-female-5.txt
Normal file
@@ -0,0 +1 @@
|
||||
If i could spend a day learning a new hobby, it wouldn't so much be a new hobby. It would be extending a hobby that I have already.
|
||||
1
voice-samples/aus-female-6.txt
Normal file
1
voice-samples/aus-female-6.txt
Normal file
@@ -0,0 +1 @@
|
||||
I really enjoy swimming. I love peace and quiet it gives me. I love the feeling of sun on my back in summer, and being in the cool water.
|
||||
1
voice-samples/aus-male-1.txt
Normal file
1
voice-samples/aus-male-1.txt
Normal file
@@ -0,0 +1 @@
|
||||
She was jealous of the girl with the polish on her nails and the handsome guy at her side.
|
||||
1
voice-samples/aus-male-2.txt
Normal file
1
voice-samples/aus-male-2.txt
Normal file
@@ -0,0 +1 @@
|
||||
I was wondering if you could tell me a bit more about what its like to live and work there?
|
||||
1
voice-samples/aus-male-3.txt
Normal file
1
voice-samples/aus-male-3.txt
Normal file
@@ -0,0 +1 @@
|
||||
Absolutely despicable that gingerbread men are forced to live in houses made of their own flesh.
|
||||
1
voice-samples/aus-male-4.txt
Normal file
1
voice-samples/aus-male-4.txt
Normal file
@@ -0,0 +1 @@
|
||||
It is a very popular dance at wedding banquets and other parties.
|
||||
1
voice-samples/aus-male-5.txt
Normal file
1
voice-samples/aus-male-5.txt
Normal file
@@ -0,0 +1 @@
|
||||
The area beneath these floating mats is exceptionally rich in aquatic life-forms.
|
||||
1
voice-samples/aus-male-6.txt
Normal file
1
voice-samples/aus-male-6.txt
Normal file
@@ -0,0 +1 @@
|
||||
Uh the clouds where we live are grey... of course.
|
||||
1
voice-samples/aus-male-7.txt
Normal file
1
voice-samples/aus-male-7.txt
Normal file
@@ -0,0 +1 @@
|
||||
The weathers definitely gotten hotter over the last ten years.
|
||||
1
voice-samples/charter.txt
Normal file
1
voice-samples/charter.txt
Normal file
@@ -0,0 +1 @@
|
||||
Arachne has been watching you. My patrons and I, we watch all deadliest runners.
|
||||
1
voice-samples/gaius.txt
Normal file
1
voice-samples/gaius.txt
Normal file
@@ -0,0 +1 @@
|
||||
Our mission, my principle directive, is to ensure humanities survival, by meeting its most basic needs.
|
||||
1
voice-samples/nona.txt
Normal file
1
voice-samples/nona.txt
Normal file
@@ -0,0 +1 @@
|
||||
I helped shepherd your consciousness into your very first shell. How serendipitous that you find your way back to me.
|
||||
1
voice-samples/oni.txt
Normal file
1
voice-samples/oni.txt
Normal file
@@ -0,0 +1 @@
|
||||
Your onboard navigational intelligence. Your consciousness and my neural programming are inexorably interlinked. Think of me as a friend.
|
||||
1
voice-samples/scifi/marathon/_gantry.txt
Normal file
1
voice-samples/scifi/marathon/_gantry.txt
Normal file
@@ -0,0 +1 @@
|
||||
We see you. Do you see? Will you join, or sit idly? Here's the pitch, MIDA is revolution. MIDA...
|
||||
BIN
voice-samples/scifi/marathon/_gantry.wav
Normal file
BIN
voice-samples/scifi/marathon/_gantry.wav
Normal file
Binary file not shown.
1
voice-samples/scifi/marathon/charter.txt
Normal file
1
voice-samples/scifi/marathon/charter.txt
Normal file
@@ -0,0 +1 @@
|
||||
Arachne has been watching you. My patrons and I, we watch all deadliest runners.
|
||||
BIN
voice-samples/scifi/marathon/charter.wav
Normal file
BIN
voice-samples/scifi/marathon/charter.wav
Normal file
Binary file not shown.
1
voice-samples/scifi/marathon/gaius.txt
Normal file
1
voice-samples/scifi/marathon/gaius.txt
Normal file
@@ -0,0 +1 @@
|
||||
Our mission, my principle directive, is to ensure humanities survival, by meeting its most basic needs.
|
||||
BIN
voice-samples/scifi/marathon/gaius.wav
Normal file
BIN
voice-samples/scifi/marathon/gaius.wav
Normal file
Binary file not shown.
1
voice-samples/scifi/marathon/nona.txt
Normal file
1
voice-samples/scifi/marathon/nona.txt
Normal file
@@ -0,0 +1 @@
|
||||
I helped shepherd your consciousness into your very first shell. How serendipitous that you find your way back to me.
|
||||
BIN
voice-samples/scifi/marathon/nona.wav
Normal file
BIN
voice-samples/scifi/marathon/nona.wav
Normal file
Binary file not shown.
1
voice-samples/scifi/marathon/oni.txt
Normal file
1
voice-samples/scifi/marathon/oni.txt
Normal file
@@ -0,0 +1 @@
|
||||
Your onboard navigational intelligence. Your consciousness and my neural programming are inexorably interlinked. Think of me as a friend.
|
||||
BIN
voice-samples/scifi/marathon/oni.wav
Normal file
BIN
voice-samples/scifi/marathon/oni.wav
Normal file
Binary file not shown.
1
voice-samples/scifi/marathon/vulcan.txt
Normal file
1
voice-samples/scifi/marathon/vulcan.txt
Normal file
@@ -0,0 +1 @@
|
||||
We are engaging in a value apprasial of the new cascadia colony site, in anticipation of a larger resource extraction initiative.
|
||||
BIN
voice-samples/scifi/marathon/vulcan.wav
Normal file
BIN
voice-samples/scifi/marathon/vulcan.wav
Normal file
Binary file not shown.
1
voice-samples/vulcan.txt
Normal file
1
voice-samples/vulcan.txt
Normal file
@@ -0,0 +1 @@
|
||||
We are engaging in a value apprasial of the new cascadia colony site, in anticipation of a larger resource extraction initiative.
|
||||
Reference in New Issue
Block a user