Swapped back to qwen3-tts

2026-05-05 16:42:49 +10:00
parent e90d2b1ec2
commit 109084e8e4
3 changed files with 100 additions and 78 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -27,7 +27,8 @@ services:
      --mem-fraction-static 0.8
      --max-running-requests 128
      --chunked-prefill-size 4096
-      --context-length 32768
+      --context-length 65536
+      --kv-cache-dtype fp8_e4m3
      --trust-remote-code
      --enable-piecewise-cuda-graph
      --schedule-policy lpm
@@ -40,20 +41,21 @@ services:
              device_ids: ['0', '7']
              capabilities: [gpu]

-  # --- TTS ---
-  # Physical: 7 | Container: 0
+# --- TTS ---
  tts:
-    build: ./swarm-control/indra-tts-server
+    build:
+      context: .  # This allows the build to see the Qwen3-TTS folder at the root
+      dockerfile: ./swarm-control/indra-tts-server/Dockerfile
    image: swarm-tts
    depends_on:
      - persona
    environment:
      - CUDA_VISIBLE_DEVICES=0
-      - PYTHONPATH=/app:/app/Qwen3-TTS
+      - PYTHONPATH=/app:/app/Qwen3-TTS # Keep this so the app finds the local code
      - NVIDIA_DRIVER_CAPABILITIES=all
    volumes:
      - /mnt/nvme3n1/swarm/voice-samples:/mnt/nvme3n1/swarm/voice-samples:ro
-      - /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS
+      - /mnt/nvme3n1/swarm/Qwen3-TTS:/app/Qwen3-TTS # Keep this for live code edits
    ports:
      - "8002:8002"
    deploy:
@@ -109,7 +111,7 @@ services:
      --host 0.0.0.0
      --hf-chat-template-name tool_use
      --mem-fraction-static 0.95
-      --context-length 32768
+      --context-length 131072
      --trust-remote-code
      --tool-call-parser qwen3_coder
    deploy:
@@ -180,8 +182,8 @@ services:
      --tp 2
      --port 3003
      --host 0.0.0.0
-      --mem-fraction-static 0.85
-      --context-length 65536     
+      --mem-fraction-static 0.80
+      --context-length 131072     
      --kv-cache-dtype fp8_e4m3  
      --trust-remote-code
      --tool-call-parser gemma4
@@ -206,7 +208,7 @@ services:
    ports:
      - "8000:8000"
    command: >
-      --model-id jinaai/jina-embeddings-v2-base-code
+      --model-id google/embeddinggemma-300m
      --max-client-batch-size 1024
    deploy:
      resources: