193 lines
7.6 KiB
Python
193 lines
7.6 KiB
Python
import sys
|
||
import os
|
||
import torch
|
||
import torch.nn.functional as F
|
||
import cv2
|
||
import numpy as np
|
||
import argparse
|
||
from PIL import Image
|
||
from torchvision import transforms
|
||
from tqdm import tqdm
|
||
|
||
# === 配置 ===
|
||
# 使用 DINOv2 Giant 带寄存器版本 (修复背景伪影,最强版本)
|
||
# 既然你不在乎显存,我们直接上 1.1B 参数的模型
|
||
MODEL_NAME = 'dinov2_vitg14_reg'
|
||
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||
|
||
def init_model():
|
||
print(f"🚀 [系统] 初始化 DINOv2 ({MODEL_NAME})...")
|
||
if DEVICE == "cuda":
|
||
print(f"✅ [硬件确认] 正在使用显卡: {torch.cuda.get_device_name(0)}")
|
||
print(f" (显存状态: {torch.cuda.memory_allocated()/1024**2:.2f}MB 已用)")
|
||
else:
|
||
print("❌ [警告] 未检测到显卡,Giant 模型在 CPU 上会非常慢!")
|
||
|
||
# 加载模型
|
||
# force_reload=False 避免每次都下载
|
||
#model = torch.hub.load('facebookresearch/dinov2', MODEL_NAME)
|
||
local_path = '/root/.cache/torch/hub/facebookresearch_dinov2_main'
|
||
|
||
print(f"📂 [系统] 正在从本地缓存加载代码: {local_path}")
|
||
if os.path.exists(local_path):
|
||
model = torch.hub.load(local_path, MODEL_NAME, source='local')
|
||
else:
|
||
# 如果万一路径不对,再回退到在线加载(虽然大概率会失败)
|
||
print("⚠️ 本地缓存未找到,尝试在线加载...")
|
||
model = torch.hub.load('facebookresearch/dinov2', MODEL_NAME)
|
||
model.to(DEVICE)
|
||
model.eval()
|
||
|
||
return model
|
||
|
||
def preprocess_for_dino(img_cv):
|
||
"""
|
||
DINOv2 专用预处理:
|
||
1. 尺寸必须是 14 的倍数
|
||
2. 标准 ImageNet 归一化
|
||
"""
|
||
h, w = img_cv.shape[:2]
|
||
|
||
# 向下取整到 14 的倍数
|
||
new_h = (h // 14) * 14
|
||
new_w = (w // 14) * 14
|
||
|
||
img_resized = cv2.resize(img_cv, (new_w, new_h))
|
||
img_pil = Image.fromarray(cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB))
|
||
|
||
transform = transforms.Compose([
|
||
transforms.ToTensor(),
|
||
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||
])
|
||
|
||
return transform(img_pil).unsqueeze(0).to(DEVICE), new_h, new_w
|
||
|
||
def scan_and_draw(model, t1_path, t2_path, output_path, threshold):
|
||
# 1. OpenCV 读取
|
||
img1_cv = cv2.imread(t1_path)
|
||
img2_cv = cv2.imread(t2_path)
|
||
|
||
if img1_cv is None or img2_cv is None:
|
||
print("❌ 错误: 无法读取图片")
|
||
return
|
||
|
||
# 强制 Resize 对齐 (以现状图 T2 为准,逻辑保持不变)
|
||
h_orig, w_orig = img2_cv.shape[:2]
|
||
img1_cv = cv2.resize(img1_cv, (w_orig, h_orig))
|
||
|
||
print(f"🔪 [处理] DINOv2 扫描... 原始尺寸: {w_orig}x{h_orig}")
|
||
|
||
# 2. 预处理 (DINO 需要整图输入,不再需要 sliding window 切片循环)
|
||
# 但为了兼容 DINO 的 Patch 机制,我们需要微调尺寸为 14 的倍数
|
||
t1_tensor, h_align, w_align = preprocess_for_dino(img1_cv)
|
||
t2_tensor, _, _ = preprocess_for_dino(img2_cv)
|
||
|
||
print(f"🧠 [推理] Giant Model 计算中 (Patch网格: {h_align//14}x{w_align//14})...")
|
||
|
||
with torch.no_grad():
|
||
# DINOv2 前向传播 (提取 Patch Token)
|
||
# feat 形状: [1, N_patches, 1536] (Giant 的维度是 1536)
|
||
feat1 = model.forward_features(t1_tensor)["x_norm_patchtokens"]
|
||
feat2 = model.forward_features(t2_tensor)["x_norm_patchtokens"]
|
||
|
||
# 计算余弦相似度
|
||
similarity = F.cosine_similarity(feat1, feat2, dim=-1) # [1, N_patches]
|
||
|
||
# 3. 生成热力图数据
|
||
# reshape 回二维网格
|
||
grid_h, grid_w = h_align // 14, w_align // 14
|
||
sim_map = similarity.reshape(grid_h, grid_w).cpu().numpy()
|
||
|
||
# 转换逻辑:相似度 -> 差异度 (Diff = 1 - Sim)
|
||
heatmap_raw = 1.0 - sim_map
|
||
|
||
# 将 14x14 的小格子放大回原图尺寸,以便与原图叠加
|
||
heatmap_avg = cv2.resize(heatmap_raw, (w_orig, h_orig), interpolation=cv2.INTER_CUBIC)
|
||
|
||
# 统计信息 (逻辑保持不变)
|
||
min_v, max_v = heatmap_avg.min(), heatmap_avg.max()
|
||
print(f"\n📊 [统计] 差异分布: Min={min_v:.4f} | Max={max_v:.4f} | Mean={heatmap_avg.mean():.4f}")
|
||
|
||
# ==========================================
|
||
# 🔥 关键:保存原始灰度图 (逻辑保持不变)
|
||
# ==========================================
|
||
raw_norm = (heatmap_avg - min_v) / (max_v - min_v + 1e-6)
|
||
cv2.imwrite("debug_raw_heatmap.png", (raw_norm * 255).astype(np.uint8))
|
||
print(f"💾 [调试] 原始热力图已保存: debug_raw_heatmap.png")
|
||
|
||
# ==========================================
|
||
# 5. 可视化后处理 (逻辑保持不变)
|
||
# ==========================================
|
||
|
||
# 归一化 (DINO 的差异通常在 0~1 之间,这里做动态拉伸以增强显示)
|
||
# 如果差异非常小,max_v 可能很小,这里设置一个最小分母防止噪点放大
|
||
norm_factor = max(max_v, 0.4)
|
||
heatmap_vis = (heatmap_avg / norm_factor * 255).clip(0, 255).astype(np.uint8)
|
||
|
||
# 色彩映射
|
||
heatmap_color = cv2.applyColorMap(heatmap_vis, cv2.COLORMAP_JET)
|
||
|
||
# 图像叠加
|
||
alpha = 0.4
|
||
blended_img = cv2.addWeighted(img2_cv, alpha, heatmap_color, 1.0 - alpha, 0)
|
||
|
||
# 阈值过滤与画框 (逻辑完全保持不变)
|
||
_, thresh_img = cv2.threshold(heatmap_vis, int(255 * threshold), 255, cv2.THRESH_BINARY)
|
||
contours, _ = cv2.findContours(thresh_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||
|
||
result_img = blended_img.copy()
|
||
box_count = 0
|
||
|
||
# 既然用了 Giant,我们可以更精细地设定最小面积
|
||
# 此处保持和你之前代码一致的逻辑,但 DINO 不需要 PatchSize 参数,我们用原图比例
|
||
min_area = (w_orig * h_orig) * 0.005 # 0.5% 的面积
|
||
|
||
for cnt in contours:
|
||
area = cv2.contourArea(cnt)
|
||
|
||
if area > min_area:
|
||
box_count += 1
|
||
x, y, bw, bh = cv2.boundingRect(cnt)
|
||
|
||
# 画框 (白色粗框 + 红色细框)
|
||
cv2.rectangle(result_img, (x, y), (x+bw, y+bh), (255, 255, 255), 4)
|
||
cv2.rectangle(result_img, (x, y), (x+bw, y+bh), (0, 0, 255), 2)
|
||
|
||
# 显示分数
|
||
# 计算该区域内的平均差异
|
||
region_score = heatmap_avg[y:y+bh, x:x+bw].mean()
|
||
label = f"{region_score:.2f}"
|
||
|
||
# 标签背景与文字
|
||
cv2.rectangle(result_img, (x, y-25), (x+80, y), (0,0,255), -1)
|
||
cv2.putText(result_img, label, (x+5, y-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2)
|
||
|
||
# 保存最终结果
|
||
cv2.imwrite(output_path, result_img)
|
||
|
||
print("="*40)
|
||
print(f"🎯 扫描完成! 发现区域: {box_count} 个")
|
||
print(f"🖼️ 结果已保存至: {output_path}")
|
||
print("="*40)
|
||
|
||
if __name__ == "__main__":
|
||
parser = argparse.ArgumentParser(description="DINOv2 Giant 违建热力图检测 (结构敏感版)")
|
||
parser.add_argument("t1", help="基准图")
|
||
parser.add_argument("t2", help="现状图")
|
||
parser.add_argument("out", nargs="?", default="dino_result.jpg", help="输出图片名")
|
||
|
||
# 为了兼容你的习惯,保留了 crop/step 参数接口,虽然 DINO 不需要它们
|
||
parser.add_argument("-c", "--crop", type=int, default=224, help="(已忽略) DINOv2 全图推理")
|
||
parser.add_argument("-s", "--step", type=int, default=0, help="(已忽略) DINOv2 全图推理")
|
||
parser.add_argument("-b", "--batch", type=int, default=16, help="(已忽略) DINOv2 全图推理")
|
||
|
||
# 核心参数
|
||
# DINO 的 Cosine 差异通常比 DreamSim 小,建议阈值给低一点 (如 0.25 - 0.35)
|
||
parser.add_argument("--thresh", type=float, default=0.30, help="检测阈值 (0.0-1.0)")
|
||
|
||
args = parser.parse_args()
|
||
|
||
# 初始化并运行
|
||
model = init_model()
|
||
scan_and_draw(model, args.t1, args.t2, args.out, args.thresh)
|