Swapped back to qwen3-tts
This commit is contained in:
@@ -27,7 +27,8 @@ services:
|
||||
--mem-fraction-static 0.8
|
||||
--max-running-requests 128
|
||||
--chunked-prefill-size 4096
|
||||
--context-length 32768
|
||||
--context-length 65536
|
||||
--kv-cache-dtype fp8_e4m3
|
||||
--trust-remote-code
|
||||
--enable-piecewise-cuda-graph
|
||||
--schedule-policy lpm
|
||||
@@ -40,20 +41,21 @@ services:
|
||||
device_ids: ['0', '7']
|
||||
capabilities: [gpu]
|
||||
|
||||
# --- TTS ---
|
||||
# Physical: 7 | Container: 0
|
||||
# --- TTS ---
|
||||
tts:
|
||||
build: ./swarm-control/indra-tts-server
|
||||
build:
|
||||
context: . # This allows the build to see the Qwen3-TTS folder at the root
|
||||
dockerfile: ./swarm-control/indra-tts-server/Dockerfile
|
||||
image: swarm-tts
|
||||
depends_on:
|
||||
- persona
|
||||
environment:
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- PYTHONPATH=/app:/app/Qwen3-TTS
|
||||
- PYTHONPATH=/app:/app/Qwen3-TTS # Keep this so the app finds the local code
|
||||
- NVIDIA_DRIVER_CAPABILITIES=all
|
||||
volumes:
|
||||
- /mnt/nvme3n1/swarm/voice-samples:/mnt/nvme3n1/swarm/voice-samples:ro
|
||||
- /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS
|
||||
- /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS # Keep this for live code edits
|
||||
ports:
|
||||
- "8002:8002"
|
||||
deploy:
|
||||
@@ -109,7 +111,7 @@ services:
|
||||
--host 0.0.0.0
|
||||
--hf-chat-template-name tool_use
|
||||
--mem-fraction-static 0.95
|
||||
--context-length 32768
|
||||
--context-length 131072
|
||||
--trust-remote-code
|
||||
--tool-call-parser qwen3_coder
|
||||
deploy:
|
||||
@@ -180,8 +182,8 @@ services:
|
||||
--tp 2
|
||||
--port 3003
|
||||
--host 0.0.0.0
|
||||
--mem-fraction-static 0.85
|
||||
--context-length 65536
|
||||
--mem-fraction-static 0.80
|
||||
--context-length 131072
|
||||
--kv-cache-dtype fp8_e4m3
|
||||
--trust-remote-code
|
||||
--tool-call-parser gemma4
|
||||
@@ -206,7 +208,7 @@ services:
|
||||
ports:
|
||||
- "8000:8000"
|
||||
command: >
|
||||
--model-id jinaai/jina-embeddings-v2-base-code
|
||||
--model-id google/embeddinggemma-300m
|
||||
--max-client-batch-size 1024
|
||||
deploy:
|
||||
resources:
|
||||
|
||||
Reference in New Issue
Block a user