结构图(如有误请纠正)
```mermaid
graph TD
%% 用户和网关
User((用户)) -->|"上传数据"| Gateway[API Gateway
入口 + 调度器]
%% Gateway 调度逻辑
Gateway -->|"1. 存储原始数据"| Folder1
Gateway -->|"2. 创建任务记录"| SQLite[(SQLite
任务状态数据库)]
%% 共享存储子图
subgraph Storage["Docker Shared Volume (/app/data)"]
Folder1["/input
(100G 原始数据)"]
Folder2["/matting_output
(抠图中间结果)"]
Folder3["/final_results
(对比最终结果)"]
SQLite
end
%% 任务分发到 Matting
Gateway -->|"4. POST /matting {task_id}"| MattingAPI[统一 Matting API]
%% Matting 服务集群
MattingAPI -->|"5. 分发任务"| MattingCluster
subgraph MattingCluster["Matting Service 集群
(共享 GPU)"]
M1[Matting Worker 1]
M2[Matting Worker 2]
M3[Matting Worker N]
end
%% Matting 处理流程
MattingCluster -->|"6. 读取分片"| Folder1
MattingCluster -->|"7. 更新状态
(processing)"| SQLite
MattingCluster -->|"8. 写入结果"| Folder2
MattingCluster -->|"9. 更新状态
(matting_done)"| SQLite
%% Gateway 轮询调度
%% SQLite -.->|"10. 轮询检测
matting_done"| Gateway
M1 -->|"11. POST /compare {task_id}"| ComparisonAPI[统一 Comparison API]
M2 -->|"11. POST /compare {task_id}"| ComparisonAPI[统一 Comparison API]
M3 -->|"11. POST /compare {task_id}"| ComparisonAPI[统一 Comparison API]
%% Comparison 服务集群
ComparisonAPI -->|"12. 分发任务"| ComparisonCluster
subgraph ComparisonCluster["Comparison Service 集群
(共享 GPU)"]
C1[Comparison Worker 1]
C2[Comparison Worker 2]
C3[Comparison Worker N]
end
%% Comparison 处理流程
ComparisonCluster -->|"13. 读取抠图结果"| Folder2
ComparisonCluster -->|"14. 更新状态
(comparing)"| SQLite
ComparisonCluster -->|"15. 写入对比结果"| Folder3
ComparisonCluster -->|"16. 更新状态
(completed)"| SQLite
%% 最终返回
SQLite -.->|"17. 检测 completed"| Gateway
Gateway -->|"18. 返回最终结果"| User
%% 样式美化 - 黑色背景优化
style Gateway fill:#1e88e5,stroke:#64b5f6,stroke-width:3px,color:#fff
style SQLite fill:#ab47bc,stroke:#ce93d8,stroke-width:2px,color:#fff
style MattingAPI fill:#26c6da,stroke:#4dd0e1,stroke-width:2px,color:#000
style ComparisonAPI fill:#ff7043,stroke:#ff8a65,stroke-width:2px,color:#fff
style MattingCluster fill:#0277bd,stroke:#0288d1,color:#fff
style ComparisonCluster fill:#d84315,stroke:#e64a19,color:#fff
style M1 fill:#0288d1,stroke:#4fc3f7,color:#fff
style M2 fill:#0288d1,stroke:#4fc3f7,color:#fff
style M3 fill:#0288d1,stroke:#4fc3f7,color:#fff
style C1 fill:#e64a19,stroke:#ff7043,color:#fff
style C2 fill:#e64a19,stroke:#ff7043,color:#fff
style C3 fill:#e64a19,stroke:#ff7043,color:#fff
style Folder1 fill:#424242,stroke:#9e9e9e,stroke-width:2px,color:#e0e0e0
style Folder2 fill:#424242,stroke:#9e9e9e,stroke-width:2px,color:#e0e0e0
style Folder3 fill:#424242,stroke:#9e9e9e,stroke-width:2px,color:#e0e0e0
style Storage fill:#1b5e20,stroke:#4caf50,stroke-width:2px,color:#fff
style User fill:#5e35b1,stroke:#9575cd,stroke-width:2px,color:#fff
```
序列图(如有误请纠正)
```mermaid
%%{init: {'theme':'dark'}}%%
sequenceDiagram
autonumber
participant User as 用户
participant GW as Gateway
(入口+调度)
participant DB as SQLite
(任务数据库)
participant FS as 共享存储
(/app/data)
participant MAPI as Matting API
participant MW as Matting Worker
participant CAPI as Comparison API
participant CW as Comparison Worker
%% 上传阶段
rect rgb(30, 60, 90)
Note over User,GW: 阶段1: 数据上传与任务创建
User->>+GW: POST /upload (100GB 原始数据)
GW->>FS: 保存到 /input/raw_data.zip
GW->>GW: 数据切片 (chunk_001 ~ chunk_N)
GW->>FS: 保存切片到 /input/chunks/
loop 为每个切片创建任务
GW->>DB: INSERT task (task_id, status='pending')
end
GW-->>-User: 返回 job_id={uuid}
end
%% Gateway 主动调度 Matting
rect rgb(0, 60, 80)
Note over GW,MW: 阶段2: Gateway 调度 Matting 任务
loop 遍历所有待处理任务
GW->>DB: SELECT task WHERE status='pending' LIMIT 1
DB-->>GW: 返回 task_id='chunk_001'
GW->>DB: UPDATE status='dispatched_matting'
GW->>+MAPI: POST /matting
{task_id: 'chunk_001', input_path: '/input/chunks/chunk_001.jpg'}
MAPI->>MAPI: 负载均衡选择空闲 Worker
MAPI->>+MW: 分发任务到 Matting Worker 1
MW->>DB: UPDATE status='processing', worker_id='matting-1', start_time=now()
MW->>FS: 读取 /input/chunks/chunk_001.jpg
MW->>MW: GPU 抠图处理 (使用共享显卡)
MW->>FS: 写入 /matting_output/chunk_001.png
MW->>DB: UPDATE status='matting_done', end_time=now()
MW-->>-MAPI: 返回 {status: 'success', output_path: '/matting_output/chunk_001.png'}
MAPI-->>-GW: 返回处理成功
Note over GW: 立即触发下一阶段
GW->>GW: 检测到 matting_done
end
end
%% Gateway 主动调度 Comparison
rect rgb(80, 40, 0)
Note over GW,CW: 阶段3: Gateway 调度 Comparison 任务
GW->>DB: SELECT task WHERE status='matting_done'
DB-->>GW: 返回 task_id='chunk_001'
GW->>DB: UPDATE status='dispatched_comparison'
GW->>+CAPI: POST /compare
{task_id: 'chunk_001', original: '/input/chunks/chunk_001.jpg', matting: '/matting_output/chunk_001.png'}
CAPI->>CAPI: 负载均衡选择空闲 Worker
CAPI->>+CW: 分发任务到 Comparison Worker 1
CW->>DB: UPDATE status='comparing', worker_id='comparison-1', start_time=now()
CW->>FS: 读取 /input/chunks/chunk_001.jpg (原图)
CW->>FS: 读取 /matting_output/chunk_001.png (抠图结果)
CW->>CW: GPU 对比分析 (使用共享显卡)
CW->>FS: 写入 /final_results/chunk_001_compare.jpg
CW->>DB: UPDATE status='completed', end_time=now()
CW-->>-CAPI: 返回 {status: 'success', result_path: '/final_results/chunk_001_compare.jpg'}
CAPI-->>-GW: 返回处理成功
end
%% 并发处理多个任务
rect rgb(40, 40, 40)
Note over GW,CW: 并发处理 (Gateway 继续调度其他任务)
par Gateway 同时调度多个任务
GW->>MAPI: POST /matting {task_id: 'chunk_002'}
MAPI->>MW: Worker 2 处理
and
GW->>MAPI: POST /matting {task_id: 'chunk_003'}
MAPI->>MW: Worker 3 处理
and
GW->>CAPI: POST /compare {task_id: 'chunk_001'}
CAPI->>CW: Worker 1 处理
end
end
%% 进度查询
rect rgb(20, 60, 40)
Note over User,GW: 阶段4: 进度查询与结果下载
User->>+GW: GET /status/{job_id}
GW->>DB: SELECT COUNT(*) FROM tasks WHERE job_id=? GROUP BY status
DB-->>GW: {pending:50, processing:3, matting_done:2, comparing:2, completed:145}
GW-->>-User: 返回进度 {total:200, completed:145, progress:72.5%}
alt 所有任务完成
User->>+GW: GET /download/{job_id}
GW->>DB: SELECT * FROM tasks WHERE job_id=? AND status='completed'
GW->>FS: 打包 /final_results/chunk_*.jpg
GW-->>-User: 返回压缩包下载链接
end
end
%% 错误处理
rect rgb(80, 20, 20)
Note over MW,GW: 异常处理与重试
MW->>MW: 处理失败 (OOM 或其他错误)
MW->>DB: UPDATE status='failed', error='CUDA out of memory'
MW-->>MAPI: 返回 {status: 'error', message: 'OOM'}
MAPI-->>GW: 返回错误信息
GW->>DB: SELECT retry_count WHERE task_id='chunk_001'
alt retry_count < 3
GW->>DB: UPDATE retry_count=retry_count+1, status='pending'
GW->>MAPI: 重新调度任务
else retry_count >= 3
GW->>DB: UPDATE status='permanently_failed'
GW->>User: 发送失败通知
end
end
```
docker-compose.yml
```yaml
version: '3.8'
services:
# API Gateway 服务
gateway:
image: your-registry/gateway:latest
container_name: gateway
ports:
- "8000:8000"
volumes:
- shared-data:/app/data
environment:
- MATTING_API_URL=http://matting-api:8001
- COMPARISON_API_URL=http://comparison-api:8002
- MAX_UPLOAD_SIZE=107374182400 # 100GB
depends_on:
- matting-api
- comparison-api
restart: unless-stopped
networks:
- app-network
# Matting 统一 API (负载均衡入口)
matting-api:
image: your-registry/matting-api:latest
container_name: matting-api
ports:
- "8001:8001"
volumes:
- shared-data:/app/data
environment:
- WORKER_URLS=http://matting-worker-1:9001,http://matting-worker-2:9001,http://matting-worker-3:9001
depends_on:
- matting-worker-1
- matting-worker-2
- matting-worker-3
restart: unless-stopped
networks:
- app-network
# Matting Worker 1 (共享 GPU)
matting-worker-1:
image: your-registry/matting-worker:latest
container_name: matting-worker-1
volumes:
- shared-data:/app/data
environment:
- CUDA_VISIBLE_DEVICES=0
- WORKER_ID=1
- GPU_MEMORY_FRACTION=0.15 # 限制显存使用比例
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
networks:
- app-network
# Matting Worker 2 (共享 GPU)
matting-worker-2:
image: your-registry/matting-worker:latest
container_name: matting-worker-2
volumes:
- shared-data:/app/data
environment:
- CUDA_VISIBLE_DEVICES=0
- WORKER_ID=2
- GPU_MEMORY_FRACTION=0.15
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
networks:
- app-network
# Matting Worker 3 (共享 GPU)
matting-worker-3:
image: your-registry/matting-worker:latest
container_name: matting-worker-3
volumes:
- shared-data:/app/data
environment:
- CUDA_VISIBLE_DEVICES=0
- WORKER_ID=3
- GPU_MEMORY_FRACTION=0.15
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
networks:
- app-network
# Comparison 统一 API (负载均衡入口)
comparison-api:
image: your-registry/comparison-api:latest
container_name: comparison-api
ports:
- "8002:8002"
volumes:
- shared-data:/app/data
environment:
- WORKER_URLS=http://comparison-worker-1:9002,http://comparison-worker-2:9002
depends_on:
- comparison-worker-1
- comparison-worker-2
restart: unless-stopped
networks:
- app-network
# Comparison Worker 1 (共享 GPU)
comparison-worker-1:
image: your-registry/comparison-worker:latest
container_name: comparison-worker-1
volumes:
- shared-data:/app/data
environment:
- CUDA_VISIBLE_DEVICES=0
- WORKER_ID=1
- GPU_MEMORY_FRACTION=0.15
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
networks:
- app-network
# Comparison Worker 2 (共享 GPU)
comparison-worker-2:
image: your-registry/comparison-worker:latest
container_name: comparison-worker-2
volumes:
- shared-data:/app/data
environment:
- CUDA_VISIBLE_DEVICES=0
- WORKER_ID=2
- GPU_MEMORY_FRACTION=0.15
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
networks:
- app-network
# 共享数据卷
volumes:
shared-data:
driver: local
driver_opts:
type: none
o: bind
device: /data/app-storage # 宿主机路径
# 网络配置
networks:
app-network:
driver: bridge
```