import sys import os import torch import torch.nn.functional as F import cv2 import numpy as np import argparse from PIL import Image from torchvision import transforms from tqdm import tqdm # === 配置 === # 使用 DINOv2 Giant 带寄存器版本 (修复背景伪影,最强版本) # 既然你不在乎显存,我们直接上 1.1B 参数的模型 MODEL_NAME = 'dinov2_vitg14_reg' DEVICE = "cuda" if torch.cuda.is_available() else "cpu" def init_model(): print(f"🚀 [系统] 初始化 DINOv2 ({MODEL_NAME})...") if DEVICE == "cuda": print(f"✅ [硬件确认] 正在使用显卡: {torch.cuda.get_device_name(0)}") print(f" (显存状态: {torch.cuda.memory_allocated()/1024**2:.2f}MB 已用)") else: print("❌ [警告] 未检测到显卡,Giant 模型在 CPU 上会非常慢!") # 加载模型 # force_reload=False 避免每次都下载 #model = torch.hub.load('facebookresearch/dinov2', MODEL_NAME) local_path = '/root/.cache/torch/hub/facebookresearch_dinov2_main' print(f"📂 [系统] 正在从本地缓存加载代码: {local_path}") if os.path.exists(local_path): model = torch.hub.load(local_path, MODEL_NAME, source='local') else: # 如果万一路径不对,再回退到在线加载(虽然大概率会失败) print("⚠️ 本地缓存未找到,尝试在线加载...") model = torch.hub.load('facebookresearch/dinov2', MODEL_NAME) model.to(DEVICE) model.eval() return model def preprocess_for_dino(img_cv): """ DINOv2 专用预处理: 1. 尺寸必须是 14 的倍数 2. 标准 ImageNet 归一化 """ h, w = img_cv.shape[:2] # 向下取整到 14 的倍数 new_h = (h // 14) * 14 new_w = (w // 14) * 14 img_resized = cv2.resize(img_cv, (new_w, new_h)) img_pil = Image.fromarray(cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) return transform(img_pil).unsqueeze(0).to(DEVICE), new_h, new_w def scan_and_draw(model, t1_path, t2_path, output_path, threshold): # 1. OpenCV 读取 img1_cv = cv2.imread(t1_path) img2_cv = cv2.imread(t2_path) if img1_cv is None or img2_cv is None: print("❌ 错误: 无法读取图片") return # 强制 Resize 对齐 (以现状图 T2 为准,逻辑保持不变) h_orig, w_orig = img2_cv.shape[:2] img1_cv = cv2.resize(img1_cv, (w_orig, h_orig)) print(f"🔪 [处理] DINOv2 扫描... 原始尺寸: {w_orig}x{h_orig}") # 2. 预处理 (DINO 需要整图输入,不再需要 sliding window 切片循环) # 但为了兼容 DINO 的 Patch 机制,我们需要微调尺寸为 14 的倍数 t1_tensor, h_align, w_align = preprocess_for_dino(img1_cv) t2_tensor, _, _ = preprocess_for_dino(img2_cv) print(f"🧠 [推理] Giant Model 计算中 (Patch网格: {h_align//14}x{w_align//14})...") with torch.no_grad(): # DINOv2 前向传播 (提取 Patch Token) # feat 形状: [1, N_patches, 1536] (Giant 的维度是 1536) feat1 = model.forward_features(t1_tensor)["x_norm_patchtokens"] feat2 = model.forward_features(t2_tensor)["x_norm_patchtokens"] # 计算余弦相似度 similarity = F.cosine_similarity(feat1, feat2, dim=-1) # [1, N_patches] # 3. 生成热力图数据 # reshape 回二维网格 grid_h, grid_w = h_align // 14, w_align // 14 sim_map = similarity.reshape(grid_h, grid_w).cpu().numpy() # 转换逻辑:相似度 -> 差异度 (Diff = 1 - Sim) heatmap_raw = 1.0 - sim_map # 将 14x14 的小格子放大回原图尺寸,以便与原图叠加 heatmap_avg = cv2.resize(heatmap_raw, (w_orig, h_orig), interpolation=cv2.INTER_CUBIC) # 统计信息 (逻辑保持不变) min_v, max_v = heatmap_avg.min(), heatmap_avg.max() print(f"\n📊 [统计] 差异分布: Min={min_v:.4f} | Max={max_v:.4f} | Mean={heatmap_avg.mean():.4f}") # ========================================== # 🔥 关键:保存原始灰度图 (逻辑保持不变) # ========================================== raw_norm = (heatmap_avg - min_v) / (max_v - min_v + 1e-6) cv2.imwrite("debug_raw_heatmap.png", (raw_norm * 255).astype(np.uint8)) print(f"💾 [调试] 原始热力图已保存: debug_raw_heatmap.png") # ========================================== # 5. 可视化后处理 (逻辑保持不变) # ========================================== # 归一化 (DINO 的差异通常在 0~1 之间,这里做动态拉伸以增强显示) # 如果差异非常小,max_v 可能很小,这里设置一个最小分母防止噪点放大 norm_factor = max(max_v, 0.4) heatmap_vis = (heatmap_avg / norm_factor * 255).clip(0, 255).astype(np.uint8) # 色彩映射 heatmap_color = cv2.applyColorMap(heatmap_vis, cv2.COLORMAP_JET) # 图像叠加 alpha = 0.4 blended_img = cv2.addWeighted(img2_cv, alpha, heatmap_color, 1.0 - alpha, 0) # 阈值过滤与画框 (逻辑完全保持不变) _, thresh_img = cv2.threshold(heatmap_vis, int(255 * threshold), 255, cv2.THRESH_BINARY) contours, _ = cv2.findContours(thresh_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) result_img = blended_img.copy() box_count = 0 # 既然用了 Giant,我们可以更精细地设定最小面积 # 此处保持和你之前代码一致的逻辑,但 DINO 不需要 PatchSize 参数,我们用原图比例 min_area = (w_orig * h_orig) * 0.005 # 0.5% 的面积 for cnt in contours: area = cv2.contourArea(cnt) if area > min_area: box_count += 1 x, y, bw, bh = cv2.boundingRect(cnt) # 画框 (白色粗框 + 红色细框) cv2.rectangle(result_img, (x, y), (x+bw, y+bh), (255, 255, 255), 4) cv2.rectangle(result_img, (x, y), (x+bw, y+bh), (0, 0, 255), 2) # 显示分数 # 计算该区域内的平均差异 region_score = heatmap_avg[y:y+bh, x:x+bw].mean() label = f"{region_score:.2f}" # 标签背景与文字 cv2.rectangle(result_img, (x, y-25), (x+80, y), (0,0,255), -1) cv2.putText(result_img, label, (x+5, y-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2) # 保存最终结果 cv2.imwrite(output_path, result_img) print("="*40) print(f"🎯 扫描完成! 发现区域: {box_count} 个") print(f"🖼️ 结果已保存至: {output_path}") print("="*40) if __name__ == "__main__": parser = argparse.ArgumentParser(description="DINOv2 Giant 违建热力图检测 (结构敏感版)") parser.add_argument("t1", help="基准图") parser.add_argument("t2", help="现状图") parser.add_argument("out", nargs="?", default="dino_result.jpg", help="输出图片名") # 为了兼容你的习惯,保留了 crop/step 参数接口,虽然 DINO 不需要它们 parser.add_argument("-c", "--crop", type=int, default=224, help="(已忽略) DINOv2 全图推理") parser.add_argument("-s", "--step", type=int, default=0, help="(已忽略) DINOv2 全图推理") parser.add_argument("-b", "--batch", type=int, default=16, help="(已忽略) DINOv2 全图推理") # 核心参数 # DINO 的 Cosine 差异通常比 DreamSim 小,建议阈值给低一点 (如 0.25 - 0.35) parser.add_argument("--thresh", type=float, default=0.30, help="检测阈值 (0.0-1.0)") args = parser.parse_args() # 初始化并运行 model = init_model() scan_and_draw(model, args.t1, args.t2, args.out, args.thresh)