From 411f9768ff9f9ceb87c0f975502d3ed0c8684905 Mon Sep 17 00:00:00 2001 From: charles Date: Tue, 10 Feb 2026 20:38:46 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=20README.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 371 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 371 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..448e4b1 --- /dev/null +++ b/README.md @@ -0,0 +1,371 @@ +结构图(如有误请纠正) +```mermaid +graph TD + %% 用户和网关 + User((用户)) -->|"上传数据"| Gateway[API Gateway
入口 + 调度器] + + %% Gateway 调度逻辑 + Gateway -->|"1. 存储原始数据"| Folder1 + Gateway -->|"2. 创建任务记录"| SQLite[(SQLite
任务状态数据库)] + Gateway -->|"3. 数据切片分发"| TaskQueue{任务队列} + + %% 共享存储子图 + subgraph Storage["Docker Shared Volume (/app/data)"] + Folder1["/input
(100G 原始数据)"] + Folder2["/matting_output
(抠图中间结果)"] + Folder3["/final_results
(对比最终结果)"] + SQLite + end + + %% 任务分发到 Matting + TaskQueue -->|"4. POST /matting {task_id}"| MattingAPI[统一 Matting API] + + %% Matting 服务集群 + MattingAPI -->|"5. 分发任务"| MattingCluster + subgraph MattingCluster["Matting Service 集群
(共享 GPU)"] + M1[Matting Worker 1] + M2[Matting Worker 2] + M3[Matting Worker N] + end + + %% Matting 处理流程 + MattingCluster -->|"6. 读取分片"| Folder1 + MattingCluster -->|"7. 更新状态
(processing)"| SQLite + MattingCluster -->|"8. 写入结果"| Folder2 + MattingCluster -->|"9. 更新状态
(matting_done)"| SQLite + + %% Gateway 轮询调度 + SQLite -.->|"10. 轮询检测
matting_done"| Gateway + Gateway -->|"11. POST /compare {task_id}"| ComparisonAPI[统一 Comparison API] + + %% Comparison 服务集群 + ComparisonAPI -->|"12. 分发任务"| ComparisonCluster + subgraph ComparisonCluster["Comparison Service 集群
(共享 GPU)"] + C1[Comparison Worker 1] + C2[Comparison Worker 2] + C3[Comparison Worker N] + end + + %% Comparison 处理流程 + ComparisonCluster -->|"13. 读取抠图结果"| Folder2 + ComparisonCluster -->|"14. 更新状态
(comparing)"| SQLite + ComparisonCluster -->|"15. 写入对比结果"| Folder3 + ComparisonCluster -->|"16. 更新状态
(completed)"| SQLite + + %% 最终返回 + SQLite -.->|"17. 检测 completed"| Gateway + Gateway -->|"18. 返回最终结果"| User + + %% 样式美化 + style Gateway fill:#bbdefb,stroke:#1565c0,stroke-width:3px + style TaskQueue fill:#fff9c4,stroke:#f57f17,stroke-width:2px + style SQLite fill:#e1bee7,stroke:#7b1fa2,stroke-width:2px + style MattingAPI fill:#e1f5fe,stroke:#01579b,stroke-width:2px + style ComparisonAPI fill:#fff3e0,stroke:#e65100,stroke-width:2px + style MattingCluster fill:#e3f2fd,stroke:#0277bd + style ComparisonCluster fill:#fff8e1,stroke:#f57c00 + style Folder1 fill:#f5f5f5,stroke:#616161 + style Folder2 fill:#f5f5f5,stroke:#616161 + style Folder3 fill:#f5f5f5,stroke:#616161 + style Storage fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px +``` +结构图(如有误请纠正) +```mermaid +sequenceDiagram + autonumber + participant User as 用户 + participant GW as Gateway
(入口+调度) + participant DB as SQLite
(任务数据库) + participant FS as 共享存储
(/app/data) + participant MAPI as Matting API + participant MW as Matting Worker + participant CAPI as Comparison API + participant CW as Comparison Worker + + %% 上传阶段 + rect rgb(220, 240, 255) + Note over User,GW: 阶段1: 数据上传与任务创建 + User->>+GW: POST /upload (100GB 原始数据) + GW->>FS: 保存到 /input/raw_data.zip + GW->>GW: 数据切片 (chunk_001 ~ chunk_N) + GW->>FS: 保存切片到 /input/chunks/ + loop 为每个切片创建任务 + GW->>DB: INSERT task (task_id, status='pending') + end + GW-->>-User: 返回 job_id={uuid} + end + + %% Gateway 主动调度 Matting + rect rgb(225, 245, 254) + Note over GW,MW: 阶段2: Gateway 调度 Matting 任务 + loop 遍历所有待处理任务 + GW->>DB: SELECT task WHERE status='pending' LIMIT 1 + DB-->>GW: 返回 task_id='chunk_001' + GW->>DB: UPDATE status='dispatched_matting' + + GW->>+MAPI: POST /matting
{task_id: 'chunk_001', input_path: '/input/chunks/chunk_001.jpg'} + MAPI->>MAPI: 负载均衡选择空闲 Worker + MAPI->>+MW: 分发任务到 Matting Worker 1 + + MW->>DB: UPDATE status='processing', worker_id='matting-1', start_time=now() + MW->>FS: 读取 /input/chunks/chunk_001.jpg + MW->>MW: GPU 抠图处理 (使用共享显卡) + MW->>FS: 写入 /matting_output/chunk_001.png + MW->>DB: UPDATE status='matting_done', end_time=now() + MW-->>-MAPI: 返回 {status: 'success', output_path: '/matting_output/chunk_001.png'} + MAPI-->>-GW: 返回处理成功 + + Note over GW: 立即触发下一阶段 + GW->>GW: 检测到 matting_done + end + end + + %% Gateway 主动调度 Comparison + rect rgb(255, 243, 224) + Note over GW,CW: 阶段3: Gateway 调度 Comparison 任务 + GW->>DB: SELECT task WHERE status='matting_done' + DB-->>GW: 返回 task_id='chunk_001' + GW->>DB: UPDATE status='dispatched_comparison' + + GW->>+CAPI: POST /compare
{task_id: 'chunk_001', original: '/input/chunks/chunk_001.jpg', matting: '/matting_output/chunk_001.png'} + CAPI->>CAPI: 负载均衡选择空闲 Worker + CAPI->>+CW: 分发任务到 Comparison Worker 1 + + CW->>DB: UPDATE status='comparing', worker_id='comparison-1', start_time=now() + CW->>FS: 读取 /input/chunks/chunk_001.jpg (原图) + CW->>FS: 读取 /matting_output/chunk_001.png (抠图结果) + CW->>CW: GPU 对比分析 (使用共享显卡) + CW->>FS: 写入 /final_results/chunk_001_compare.jpg + CW->>DB: UPDATE status='completed', end_time=now() + CW-->>-CAPI: 返回 {status: 'success', result_path: '/final_results/chunk_001_compare.jpg'} + CAPI-->>-GW: 返回处理成功 + end + + %% 并发处理多个任务 + rect rgb(245, 245, 245) + Note over GW,CW: 并发处理 (Gateway 继续调度其他任务) + par Gateway 同时调度多个任务 + GW->>MAPI: POST /matting {task_id: 'chunk_002'} + MAPI->>MW: Worker 2 处理 + and + GW->>MAPI: POST /matting {task_id: 'chunk_003'} + MAPI->>MW: Worker 3 处理 + and + GW->>CAPI: POST /compare {task_id: 'chunk_001'} + CAPI->>CW: Worker 1 处理 + end + end + + %% 进度查询 + rect rgb(232, 245, 233) + Note over User,GW: 阶段4: 进度查询与结果下载 + User->>+GW: GET /status/{job_id} + GW->>DB: SELECT COUNT(*) FROM tasks WHERE job_id=? GROUP BY status + DB-->>GW: {pending:50, processing:3, matting_done:2, comparing:2, completed:145} + GW-->>-User: 返回进度 {total:200, completed:145, progress:72.5%} + + alt 所有任务完成 + User->>+GW: GET /download/{job_id} + GW->>DB: SELECT * FROM tasks WHERE job_id=? AND status='completed' + GW->>FS: 打包 /final_results/chunk_*.jpg + GW-->>-User: 返回压缩包下载链接 + end + end + + %% 错误处理 + rect rgb(255, 235, 238) + Note over MW,GW: 异常处理与重试 + MW->>MW: 处理失败 (OOM 或其他错误) + MW->>DB: UPDATE status='failed', error='CUDA out of memory' + MW-->>MAPI: 返回 {status: 'error', message: 'OOM'} + MAPI-->>GW: 返回错误信息 + + GW->>DB: SELECT retry_count WHERE task_id='chunk_001' + alt retry_count < 3 + GW->>DB: UPDATE retry_count=retry_count+1, status='pending' + GW->>MAPI: 重新调度任务 + else retry_count >= 3 + GW->>DB: UPDATE status='permanently_failed' + GW->>User: 发送失败通知 + end + end +``` + +docker-compose.yml +```yaml +version: '3.8' + +services: + # API Gateway 服务 + gateway: + image: your-registry/gateway:latest + container_name: gateway + ports: + - "8000:8000" + volumes: + - shared-data:/app/data + environment: + - MATTING_API_URL=http://matting-api:8001 + - COMPARISON_API_URL=http://comparison-api:8002 + - MAX_UPLOAD_SIZE=107374182400 # 100GB + depends_on: + - matting-api + - comparison-api + restart: unless-stopped + networks: + - app-network + + # Matting 统一 API (负载均衡入口) + matting-api: + image: your-registry/matting-api:latest + container_name: matting-api + ports: + - "8001:8001" + volumes: + - shared-data:/app/data + environment: + - WORKER_URLS=http://matting-worker-1:9001,http://matting-worker-2:9001,http://matting-worker-3:9001 + depends_on: + - matting-worker-1 + - matting-worker-2 + - matting-worker-3 + restart: unless-stopped + networks: + - app-network + + # Matting Worker 1 (共享 GPU) + matting-worker-1: + image: your-registry/matting-worker:latest + container_name: matting-worker-1 + volumes: + - shared-data:/app/data + environment: + - CUDA_VISIBLE_DEVICES=0 + - WORKER_ID=1 + - GPU_MEMORY_FRACTION=0.15 # 限制显存使用比例 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + networks: + - app-network + + # Matting Worker 2 (共享 GPU) + matting-worker-2: + image: your-registry/matting-worker:latest + container_name: matting-worker-2 + volumes: + - shared-data:/app/data + environment: + - CUDA_VISIBLE_DEVICES=0 + - WORKER_ID=2 + - GPU_MEMORY_FRACTION=0.15 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + networks: + - app-network + + # Matting Worker 3 (共享 GPU) + matting-worker-3: + image: your-registry/matting-worker:latest + container_name: matting-worker-3 + volumes: + - shared-data:/app/data + environment: + - CUDA_VISIBLE_DEVICES=0 + - WORKER_ID=3 + - GPU_MEMORY_FRACTION=0.15 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + networks: + - app-network + + # Comparison 统一 API (负载均衡入口) + comparison-api: + image: your-registry/comparison-api:latest + container_name: comparison-api + ports: + - "8002:8002" + volumes: + - shared-data:/app/data + environment: + - WORKER_URLS=http://comparison-worker-1:9002,http://comparison-worker-2:9002 + depends_on: + - comparison-worker-1 + - comparison-worker-2 + restart: unless-stopped + networks: + - app-network + + # Comparison Worker 1 (共享 GPU) + comparison-worker-1: + image: your-registry/comparison-worker:latest + container_name: comparison-worker-1 + volumes: + - shared-data:/app/data + environment: + - CUDA_VISIBLE_DEVICES=0 + - WORKER_ID=1 + - GPU_MEMORY_FRACTION=0.15 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + networks: + - app-network + + # Comparison Worker 2 (共享 GPU) + comparison-worker-2: + image: your-registry/comparison-worker:latest + container_name: comparison-worker-2 + volumes: + - shared-data:/app/data + environment: + - CUDA_VISIBLE_DEVICES=0 + - WORKER_ID=2 + - GPU_MEMORY_FRACTION=0.15 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + networks: + - app-network + +# 共享数据卷 +volumes: + shared-data: + driver: local + driver_opts: + type: none + o: bind + device: /data/app-storage # 宿主机路径 + +# 网络配置 +networks: + app-network: + driver: bridge +``` \ No newline at end of file