结构图（如有误请纠正）
```mermaid
graph TD
    %% 用户和网关
    User((用户)) -->|"上传数据"| Gateway[API Gateway<br/>入口 + 调度器]
    
    %% Gateway 调度逻辑
    Gateway -->|"1. 存储原始数据"| Folder1
    Gateway -->|"2. 创建任务记录"| SQLite[(SQLite<br/>任务状态数据库)]
    Gateway -->|"3. 数据切片分发"| TaskQueue{任务队列}
    
    %% 共享存储子图
    subgraph Storage["Docker Shared Volume (/app/data)"]
        Folder1["/input<br/>(100G 原始数据)"]
        Folder2["/matting_output<br/>(抠图中间结果)"]
        Folder3["/final_results<br/>(对比最终结果)"]
        SQLite
    end
    
    %% 任务分发到 Matting
    TaskQueue -->|"4. POST /matting {task_id}"| MattingAPI[统一 Matting API]
    
    %% Matting 服务集群
    MattingAPI -->|"5. 分发任务"| MattingCluster
    subgraph MattingCluster["Matting Service 集群<br/>(共享 GPU)"]
        M1[Matting Worker 1]
        M2[Matting Worker 2]
        M3[Matting Worker N]
    end
    
    %% Matting 处理流程
    MattingCluster -->|"6. 读取分片"| Folder1
    MattingCluster -->|"7. 更新状态<br/>(processing)"| SQLite
    MattingCluster -->|"8. 写入结果"| Folder2
    MattingCluster -->|"9. 更新状态<br/>(matting_done)"| SQLite
    
    %% Gateway 轮询调度
    SQLite -.->|"10. 轮询检测<br/>matting_done"| Gateway
    Gateway -->|"11. POST /compare {task_id}"| ComparisonAPI[统一 Comparison API]
    
    %% Comparison 服务集群
    ComparisonAPI -->|"12. 分发任务"| ComparisonCluster
    subgraph ComparisonCluster["Comparison Service 集群<br/>(共享 GPU)"]
        C1[Comparison Worker 1]
        C2[Comparison Worker 2]
        C3[Comparison Worker N]
    end
    
    %% Comparison 处理流程
    ComparisonCluster -->|"13. 读取抠图结果"| Folder2
    ComparisonCluster -->|"14. 更新状态<br/>(comparing)"| SQLite
    ComparisonCluster -->|"15. 写入对比结果"| Folder3
    ComparisonCluster -->|"16. 更新状态<br/>(completed)"| SQLite
    
    %% 最终返回
    SQLite -.->|"17. 检测 completed"| Gateway
    Gateway -->|"18. 返回最终结果"| User
    
    %% 样式美化 - 黑色背景优化
    style Gateway fill:#1e88e5,stroke:#64b5f6,stroke-width:3px,color:#fff
    style TaskQueue fill:#ffa726,stroke:#ffb74d,stroke-width:2px,color:#000
    style SQLite fill:#ab47bc,stroke:#ce93d8,stroke-width:2px,color:#fff
    style MattingAPI fill:#26c6da,stroke:#4dd0e1,stroke-width:2px,color:#000
    style ComparisonAPI fill:#ff7043,stroke:#ff8a65,stroke-width:2px,color:#fff
    style MattingCluster fill:#0277bd,stroke:#0288d1,color:#fff
    style ComparisonCluster fill:#d84315,stroke:#e64a19,color:#fff
    style M1 fill:#0288d1,stroke:#4fc3f7,color:#fff
    style M2 fill:#0288d1,stroke:#4fc3f7,color:#fff
    style M3 fill:#0288d1,stroke:#4fc3f7,color:#fff
    style C1 fill:#e64a19,stroke:#ff7043,color:#fff
    style C2 fill:#e64a19,stroke:#ff7043,color:#fff
    style C3 fill:#e64a19,stroke:#ff7043,color:#fff
    style Folder1 fill:#424242,stroke:#9e9e9e,stroke-width:2px,color:#e0e0e0
    style Folder2 fill:#424242,stroke:#9e9e9e,stroke-width:2px,color:#e0e0e0
    style Folder3 fill:#424242,stroke:#9e9e9e,stroke-width:2px,color:#e0e0e0
    style Storage fill:#1b5e20,stroke:#4caf50,stroke-width:2px,color:#fff
    style User fill:#5e35b1,stroke:#9575cd,stroke-width:2px,color:#fff
```

序列图（如有误请纠正）

```mermaid
%%{init: {'theme':'dark'}}%%
sequenceDiagram
    autonumber
    participant User as 用户
    participant GW as Gateway<br/>(入口+调度)
    participant DB as SQLite<br/>(任务数据库)
    participant FS as 共享存储<br/>(/app/data)
    participant MAPI as Matting API
    participant MW as Matting Worker
    participant CAPI as Comparison API
    participant CW as Comparison Worker

    %% 上传阶段
    rect rgb(30, 60, 90)
        Note over User,GW: 阶段1: 数据上传与任务创建
        User->>+GW: POST /upload (100GB 原始数据)
        GW->>FS: 保存到 /input/raw_data.zip
        GW->>GW: 数据切片 (chunk_001 ~ chunk_N)
        GW->>FS: 保存切片到 /input/chunks/
        loop 为每个切片创建任务
            GW->>DB: INSERT task (task_id, status='pending')
        end
        GW-->>-User: 返回 job_id={uuid}
    end

    %% Gateway 主动调度 Matting
    rect rgb(0, 60, 80)
        Note over GW,MW: 阶段2: Gateway 调度 Matting 任务
        loop 遍历所有待处理任务
            GW->>DB: SELECT task WHERE status='pending' LIMIT 1
            DB-->>GW: 返回 task_id='chunk_001'
            GW->>DB: UPDATE status='dispatched_matting'
            
            GW->>+MAPI: POST /matting<br/>{task_id: 'chunk_001', input_path: '/input/chunks/chunk_001.jpg'}
            MAPI->>MAPI: 负载均衡选择空闲 Worker
            MAPI->>+MW: 分发任务到 Matting Worker 1
            
            MW->>DB: UPDATE status='processing', worker_id='matting-1', start_time=now()
            MW->>FS: 读取 /input/chunks/chunk_001.jpg
            MW->>MW: GPU 抠图处理 (使用共享显卡)
            MW->>FS: 写入 /matting_output/chunk_001.png
            MW->>DB: UPDATE status='matting_done', end_time=now()
            MW-->>-MAPI: 返回 {status: 'success', output_path: '/matting_output/chunk_001.png'}
            MAPI-->>-GW: 返回处理成功
            
            Note over GW: 立即触发下一阶段
            GW->>GW: 检测到 matting_done
        end
    end

    %% Gateway 主动调度 Comparison
    rect rgb(80, 40, 0)
        Note over GW,CW: 阶段3: Gateway 调度 Comparison 任务
        GW->>DB: SELECT task WHERE status='matting_done'
        DB-->>GW: 返回 task_id='chunk_001'
        GW->>DB: UPDATE status='dispatched_comparison'
        
        GW->>+CAPI: POST /compare<br/>{task_id: 'chunk_001', original: '/input/chunks/chunk_001.jpg', matting: '/matting_output/chunk_001.png'}
        CAPI->>CAPI: 负载均衡选择空闲 Worker
        CAPI->>+CW: 分发任务到 Comparison Worker 1
        
        CW->>DB: UPDATE status='comparing', worker_id='comparison-1', start_time=now()
        CW->>FS: 读取 /input/chunks/chunk_001.jpg (原图)
        CW->>FS: 读取 /matting_output/chunk_001.png (抠图结果)
        CW->>CW: GPU 对比分析 (使用共享显卡)
        CW->>FS: 写入 /final_results/chunk_001_compare.jpg
        CW->>DB: UPDATE status='completed', end_time=now()
        CW-->>-CAPI: 返回 {status: 'success', result_path: '/final_results/chunk_001_compare.jpg'}
        CAPI-->>-GW: 返回处理成功
    end

    %% 并发处理多个任务
    rect rgb(40, 40, 40)
        Note over GW,CW: 并发处理 (Gateway 继续调度其他任务)
        par Gateway 同时调度多个任务
            GW->>MAPI: POST /matting {task_id: 'chunk_002'}
            MAPI->>MW: Worker 2 处理
        and
            GW->>MAPI: POST /matting {task_id: 'chunk_003'}
            MAPI->>MW: Worker 3 处理
        and
            GW->>CAPI: POST /compare {task_id: 'chunk_001'}
            CAPI->>CW: Worker 1 处理
        end
    end

    %% 进度查询
    rect rgb(20, 60, 40)
        Note over User,GW: 阶段4: 进度查询与结果下载
        User->>+GW: GET /status/{job_id}
        GW->>DB: SELECT COUNT(*) FROM tasks WHERE job_id=? GROUP BY status
        DB-->>GW: {pending:50, processing:3, matting_done:2, comparing:2, completed:145}
        GW-->>-User: 返回进度 {total:200, completed:145, progress:72.5%}
        
        alt 所有任务完成
            User->>+GW: GET /download/{job_id}
            GW->>DB: SELECT * FROM tasks WHERE job_id=? AND status='completed'
            GW->>FS: 打包 /final_results/chunk_*.jpg
            GW-->>-User: 返回压缩包下载链接
        end
    end

    %% 错误处理
    rect rgb(80, 20, 20)
        Note over MW,GW: 异常处理与重试
        MW->>MW: 处理失败 (OOM 或其他错误)
        MW->>DB: UPDATE status='failed', error='CUDA out of memory'
        MW-->>MAPI: 返回 {status: 'error', message: 'OOM'}
        MAPI-->>GW: 返回错误信息
        
        GW->>DB: SELECT retry_count WHERE task_id='chunk_001'
        alt retry_count < 3
            GW->>DB: UPDATE retry_count=retry_count+1, status='pending'
            GW->>MAPI: 重新调度任务
        else retry_count >= 3
            GW->>DB: UPDATE status='permanently_failed'
            GW->>User: 发送失败通知
        end
    end
```

docker-compose.yml
```yaml
version: '3.8'

services:
  # API Gateway 服务
  gateway:
    image: your-registry/gateway:latest
    container_name: gateway
    ports:
      - "8000:8000"
    volumes:
      - shared-data:/app/data
    environment:
      - MATTING_API_URL=http://matting-api:8001
      - COMPARISON_API_URL=http://comparison-api:8002
      - MAX_UPLOAD_SIZE=107374182400  # 100GB
    depends_on:
      - matting-api
      - comparison-api
    restart: unless-stopped
    networks:
      - app-network

  # Matting 统一 API (负载均衡入口)
  matting-api:
    image: your-registry/matting-api:latest
    container_name: matting-api
    ports:
      - "8001:8001"
    volumes:
      - shared-data:/app/data
    environment:
      - WORKER_URLS=http://matting-worker-1:9001,http://matting-worker-2:9001,http://matting-worker-3:9001
    depends_on:
      - matting-worker-1
      - matting-worker-2
      - matting-worker-3
    restart: unless-stopped
    networks:
      - app-network

  # Matting Worker 1 (共享 GPU)
  matting-worker-1:
    image: your-registry/matting-worker:latest
    container_name: matting-worker-1
    volumes:
      - shared-data:/app/data
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - WORKER_ID=1
      - GPU_MEMORY_FRACTION=0.15  # 限制显存使用比例
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    networks:
      - app-network

  # Matting Worker 2 (共享 GPU)
  matting-worker-2:
    image: your-registry/matting-worker:latest
    container_name: matting-worker-2
    volumes:
      - shared-data:/app/data
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - WORKER_ID=2
      - GPU_MEMORY_FRACTION=0.15
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    networks:
      - app-network

  # Matting Worker 3 (共享 GPU)
  matting-worker-3:
    image: your-registry/matting-worker:latest
    container_name: matting-worker-3
    volumes:
      - shared-data:/app/data
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - WORKER_ID=3
      - GPU_MEMORY_FRACTION=0.15
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    networks:
      - app-network

  # Comparison 统一 API (负载均衡入口)
  comparison-api:
    image: your-registry/comparison-api:latest
    container_name: comparison-api
    ports:
      - "8002:8002"
    volumes:
      - shared-data:/app/data
    environment:
      - WORKER_URLS=http://comparison-worker-1:9002,http://comparison-worker-2:9002
    depends_on:
      - comparison-worker-1
      - comparison-worker-2
    restart: unless-stopped
    networks:
      - app-network

  # Comparison Worker 1 (共享 GPU)
  comparison-worker-1:
    image: your-registry/comparison-worker:latest
    container_name: comparison-worker-1
    volumes:
      - shared-data:/app/data
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - WORKER_ID=1
      - GPU_MEMORY_FRACTION=0.15
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    networks:
      - app-network

  # Comparison Worker 2 (共享 GPU)
  comparison-worker-2:
    image: your-registry/comparison-worker:latest
    container_name: comparison-worker-2
    volumes:
      - shared-data:/app/data
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - WORKER_ID=2
      - GPU_MEMORY_FRACTION=0.15
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    networks:
      - app-network

# 共享数据卷
volumes:
  shared-data:
    driver: local
    driver_opts:
      type: none
      o: bind
      device: /data/app-storage  # 宿主机路径

# 网络配置
networks:
  app-network:
    driver: bridge
```