diff --git a/docker-compose.yml b/docker-compose.yml
index 14d7233..0478980 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -64,7 +64,6 @@ services:
               device_ids: ['7']
               capabilities: [gpu]
 
-
   # --- STT ---
   stt:
     image: ghcr.io/speaches-ai/speaches:latest-cuda
@@ -88,9 +87,7 @@ services:
               device_ids: ['0']
               capabilities: [gpu]
 
-
-
-# ==========================================
+  # ==========================================
   # TIER 1: COMMAND LOBE (GPUs 1 & 2)
   # ==========================================
   coder_next:
@@ -114,6 +111,7 @@ services:
       --mem-fraction-static 0.95
       --context-length 32768
       --trust-remote-code
+      --tool-call-parser qwen3_coder
     deploy:
       resources:
         reservations:
@@ -123,28 +121,15 @@ services:
               capabilities: [gpu]
 
   # ==========================================
-  # TIER 2: SWARM LOBE (GPUs 3, 4, 5)
+  # TIER 2: HETEROGENEOUS MODELS (GPUs 3, 4, 5)
   # ==========================================
-  swarm_router:
-    image: lmsysorg/sgl-model-gateway:latest
-    ports:
-      - "4000:4000"
-    command: >
-      --port 4000
-      --worker-urls http://worker_1:3002 http://worker_2:3003 http://worker_3:3004
-      --policy cache_aware
-    depends_on:
-      - worker_1
-      - worker_2
-      - worker_3
-    networks:
-      - default
 
-  worker_1:
-    image: lmsysorg/sglang:latest
+
+  qwen_27b:
+    image: lmsysorg/sglang:dev
     ipc: host
     environment:
-      - CUDA_VISIBLE_DEVICES=0 # Mapped from physical 3
+      - CUDA_VISIBLE_DEVICES=0,1 # Mapped from physical 3, 4
       - HUGGING_FACE_HUB_TOKEN=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
     volumes:
       - /mnt/nvme3n1/swarm/huggingface_cache:/root/.cache/huggingface
@@ -152,35 +137,36 @@ services:
     ports:
       - "3002:3002"
     command: >
-      sglang serve
-      --model-path Qwen/Qwen3.6-35B-A3B-FP8
-      --tp 1
+      python3 -m sglang.launch_server
+      --model-path Qwen/Qwen3.6-27B-FP8
+      --tp 2
       --port 3002
       --host 0.0.0.0
-      --mem-fraction-static 0.9
+      --mem-fraction-static 0.85
       --context-length 131072
-      --kv-cache-dtype fp8_e4m3
-      --allow-auto-truncate
-      --max-running-requests 256
-      --chunked-prefill-size 2048
-      --schedule-policy lpm
       --tool-call-parser qwen3_coder
       --reasoning-parser qwen3
-      --hf-chat-template-name tool_use
       --trust-remote-code
+
+    cap_add:
+      - SYS_NICE
+
     deploy:
       resources:
         reservations:
           devices:
             - driver: nvidia
-              device_ids: ['3']
+              device_ids: ['3', '4']
               capabilities: [gpu]
 
-  worker_2:
-    image: lmsysorg/sglang:latest
+
+  gemma_31b:
+    build: ./swarm-control/persona 
     ipc: host
+    cap_add:
+      - SYS_NICE 
     environment:
-      - CUDA_VISIBLE_DEVICES=0 # Mapped from physical 4
+      - CUDA_VISIBLE_DEVICES=0,1 # Mapped from physical 5, 6
       - HUGGING_FACE_HUB_TOKEN=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
     volumes:
       - /mnt/nvme3n1/swarm/huggingface_cache:/root/.cache/huggingface
@@ -188,68 +174,27 @@ services:
     ports:
       - "3003:3003"
     command: >
-      sglang serve
-      --model-path Qwen/Qwen3.6-35B-A3B-FP8
-      --tp 1
+      python3 -m sglang.launch_server
+      --model-path RedHatAI/gemma-4-31B-it-FP8-Dynamic
+      --tokenizer-path google/gemma-4-31B-it
+      --tp 2
       --port 3003
       --host 0.0.0.0
-      --mem-fraction-static 0.90
-      --context-length 131072
-      --kv-cache-dtype fp8_e4m3
-      --allow-auto-truncate
-      --max-running-requests 256
-      --chunked-prefill-size 2048
-      --schedule-policy lpm
-      --tool-call-parser qwen3_coder
-      --reasoning-parser qwen3
-      --hf-chat-template-name tool_use
+      --mem-fraction-static 0.85
+      --context-length 65536     
+      --kv-cache-dtype fp8_e4m3  
       --trust-remote-code
+      --tool-call-parser gemma4
     deploy:
       resources:
         reservations:
           devices:
             - driver: nvidia
-              device_ids: ['4']
+              device_ids: ['5', '6']
               capabilities: [gpu]
-
-  worker_3:
-    image: lmsysorg/sglang:latest
-    ipc: host
-    environment:
-      - CUDA_VISIBLE_DEVICES=0 # Mapped from physical 5
-      - HUGGING_FACE_HUB_TOKEN=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
-    volumes:
-      - /mnt/nvme3n1/swarm/huggingface_cache:/root/.cache/huggingface
-      - /mnt/nvme3n1/swarm/sglang_cache:/root/.cache/sglang
-    ports:
-      - "3004:3004"
-    command: >
-      sglang serve
-      --model-path Qwen/Qwen3.6-35B-A3B-FP8
-      --tp 1
-      --port 3004
-      --host 0.0.0.0
-      --mem-fraction-static 0.90
-      --context-length 131072
-      --kv-cache-dtype fp8_e4m3
-      --allow-auto-truncate
-      --max-running-requests 256
-      --chunked-prefill-size 2048
-      --schedule-policy lpm
-      --tool-call-parser qwen3_coder
-      --reasoning-parser qwen3
-      --hf-chat-template-name tool_use
-      --trust-remote-code
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              device_ids: ['5']
-              capabilities: [gpu]
-
-  # ==========================================
-  # TIER 3: MEMORY LOBE (GPU 6)
+  
+# ==========================================
+  # TIER 3: MEMORY (GPU 6)
   # ==========================================
   embeddings:
     image: ghcr.io/huggingface/text-embeddings-inference:latest
@@ -257,7 +202,6 @@ services:
       - CUDA_VISIBLE_DEVICES=0 # Mapped from physical 6
       - HUGGING_FACE_HUB_TOKEN=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
     volumes:
-      # TEI uses /data as its internal cache directory
       - /mnt/nvme3n1/swarm/huggingface_cache:/data
     ports:
       - "8000:8000"
@@ -272,8 +216,6 @@ services:
               device_ids: ['6']
               capabilities: [gpu]
 
-
-
 # --- Networks ---
 
 networks: