Swapped back to qwen3-tts

This commit is contained in:
2026-05-05 16:42:49 +10:00
parent e90d2b1ec2
commit 109084e8e4
3 changed files with 100 additions and 78 deletions

View File

@@ -27,7 +27,8 @@ services:
--mem-fraction-static 0.8
--max-running-requests 128
--chunked-prefill-size 4096
--context-length 32768
--context-length 65536
--kv-cache-dtype fp8_e4m3
--trust-remote-code
--enable-piecewise-cuda-graph
--schedule-policy lpm
@@ -40,20 +41,21 @@ services:
device_ids: ['0', '7']
capabilities: [gpu]
# --- TTS ---
# Physical: 7 | Container: 0
# --- TTS ---
tts:
build: ./swarm-control/indra-tts-server
build:
context: . # This allows the build to see the Qwen3-TTS folder at the root
dockerfile: ./swarm-control/indra-tts-server/Dockerfile
image: swarm-tts
depends_on:
- persona
environment:
- CUDA_VISIBLE_DEVICES=0
- PYTHONPATH=/app:/app/Qwen3-TTS
- PYTHONPATH=/app:/app/Qwen3-TTS # Keep this so the app finds the local code
- NVIDIA_DRIVER_CAPABILITIES=all
volumes:
- /mnt/nvme3n1/swarm/voice-samples:/mnt/nvme3n1/swarm/voice-samples:ro
- /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS
- /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS # Keep this for live code edits
ports:
- "8002:8002"
deploy:
@@ -109,7 +111,7 @@ services:
--host 0.0.0.0
--hf-chat-template-name tool_use
--mem-fraction-static 0.95
--context-length 32768
--context-length 131072
--trust-remote-code
--tool-call-parser qwen3_coder
deploy:
@@ -180,8 +182,8 @@ services:
--tp 2
--port 3003
--host 0.0.0.0
--mem-fraction-static 0.85
--context-length 65536
--mem-fraction-static 0.80
--context-length 131072
--kv-cache-dtype fp8_e4m3
--trust-remote-code
--tool-call-parser gemma4
@@ -206,7 +208,7 @@ services:
ports:
- "8000:8000"
command: >
--model-id jinaai/jina-embeddings-v2-base-code
--model-id google/embeddinggemma-300m
--max-client-batch-size 1024
deploy:
resources: