puruan/main_dinov2.py
2026-02-04 09:54:24 +08:00

193 lines
7.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sys
import os
import torch
import torch.nn.functional as F
import cv2
import numpy as np
import argparse
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
# === 配置 ===
# 使用 DINOv2 Giant 带寄存器版本 (修复背景伪影,最强版本)
# 既然你不在乎显存,我们直接上 1.1B 参数的模型
MODEL_NAME = 'dinov2_vitg14_reg'
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
def init_model():
print(f"🚀 [系统] 初始化 DINOv2 ({MODEL_NAME})...")
if DEVICE == "cuda":
print(f"✅ [硬件确认] 正在使用显卡: {torch.cuda.get_device_name(0)}")
print(f" (显存状态: {torch.cuda.memory_allocated()/1024**2:.2f}MB 已用)")
else:
print("❌ [警告] 未检测到显卡Giant 模型在 CPU 上会非常慢!")
# 加载模型
# force_reload=False 避免每次都下载
#model = torch.hub.load('facebookresearch/dinov2', MODEL_NAME)
local_path = '/root/.cache/torch/hub/facebookresearch_dinov2_main'
print(f"📂 [系统] 正在从本地缓存加载代码: {local_path}")
if os.path.exists(local_path):
model = torch.hub.load(local_path, MODEL_NAME, source='local')
else:
# 如果万一路径不对,再回退到在线加载(虽然大概率会失败)
print("⚠️ 本地缓存未找到,尝试在线加载...")
model = torch.hub.load('facebookresearch/dinov2', MODEL_NAME)
model.to(DEVICE)
model.eval()
return model
def preprocess_for_dino(img_cv):
"""
DINOv2 专用预处理:
1. 尺寸必须是 14 的倍数
2. 标准 ImageNet 归一化
"""
h, w = img_cv.shape[:2]
# 向下取整到 14 的倍数
new_h = (h // 14) * 14
new_w = (w // 14) * 14
img_resized = cv2.resize(img_cv, (new_w, new_h))
img_pil = Image.fromarray(cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB))
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
return transform(img_pil).unsqueeze(0).to(DEVICE), new_h, new_w
def scan_and_draw(model, t1_path, t2_path, output_path, threshold):
# 1. OpenCV 读取
img1_cv = cv2.imread(t1_path)
img2_cv = cv2.imread(t2_path)
if img1_cv is None or img2_cv is None:
print("❌ 错误: 无法读取图片")
return
# 强制 Resize 对齐 (以现状图 T2 为准,逻辑保持不变)
h_orig, w_orig = img2_cv.shape[:2]
img1_cv = cv2.resize(img1_cv, (w_orig, h_orig))
print(f"🔪 [处理] DINOv2 扫描... 原始尺寸: {w_orig}x{h_orig}")
# 2. 预处理 (DINO 需要整图输入,不再需要 sliding window 切片循环)
# 但为了兼容 DINO 的 Patch 机制,我们需要微调尺寸为 14 的倍数
t1_tensor, h_align, w_align = preprocess_for_dino(img1_cv)
t2_tensor, _, _ = preprocess_for_dino(img2_cv)
print(f"🧠 [推理] Giant Model 计算中 (Patch网格: {h_align//14}x{w_align//14})...")
with torch.no_grad():
# DINOv2 前向传播 (提取 Patch Token)
# feat 形状: [1, N_patches, 1536] (Giant 的维度是 1536)
feat1 = model.forward_features(t1_tensor)["x_norm_patchtokens"]
feat2 = model.forward_features(t2_tensor)["x_norm_patchtokens"]
# 计算余弦相似度
similarity = F.cosine_similarity(feat1, feat2, dim=-1) # [1, N_patches]
# 3. 生成热力图数据
# reshape 回二维网格
grid_h, grid_w = h_align // 14, w_align // 14
sim_map = similarity.reshape(grid_h, grid_w).cpu().numpy()
# 转换逻辑:相似度 -> 差异度 (Diff = 1 - Sim)
heatmap_raw = 1.0 - sim_map
# 将 14x14 的小格子放大回原图尺寸,以便与原图叠加
heatmap_avg = cv2.resize(heatmap_raw, (w_orig, h_orig), interpolation=cv2.INTER_CUBIC)
# 统计信息 (逻辑保持不变)
min_v, max_v = heatmap_avg.min(), heatmap_avg.max()
print(f"\n📊 [统计] 差异分布: Min={min_v:.4f} | Max={max_v:.4f} | Mean={heatmap_avg.mean():.4f}")
# ==========================================
# 🔥 关键:保存原始灰度图 (逻辑保持不变)
# ==========================================
raw_norm = (heatmap_avg - min_v) / (max_v - min_v + 1e-6)
cv2.imwrite("debug_raw_heatmap.png", (raw_norm * 255).astype(np.uint8))
print(f"💾 [调试] 原始热力图已保存: debug_raw_heatmap.png")
# ==========================================
# 5. 可视化后处理 (逻辑保持不变)
# ==========================================
# 归一化 (DINO 的差异通常在 0~1 之间,这里做动态拉伸以增强显示)
# 如果差异非常小max_v 可能很小,这里设置一个最小分母防止噪点放大
norm_factor = max(max_v, 0.4)
heatmap_vis = (heatmap_avg / norm_factor * 255).clip(0, 255).astype(np.uint8)
# 色彩映射
heatmap_color = cv2.applyColorMap(heatmap_vis, cv2.COLORMAP_JET)
# 图像叠加
alpha = 0.4
blended_img = cv2.addWeighted(img2_cv, alpha, heatmap_color, 1.0 - alpha, 0)
# 阈值过滤与画框 (逻辑完全保持不变)
_, thresh_img = cv2.threshold(heatmap_vis, int(255 * threshold), 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
result_img = blended_img.copy()
box_count = 0
# 既然用了 Giant我们可以更精细地设定最小面积
# 此处保持和你之前代码一致的逻辑,但 DINO 不需要 PatchSize 参数,我们用原图比例
min_area = (w_orig * h_orig) * 0.005 # 0.5% 的面积
for cnt in contours:
area = cv2.contourArea(cnt)
if area > min_area:
box_count += 1
x, y, bw, bh = cv2.boundingRect(cnt)
# 画框 (白色粗框 + 红色细框)
cv2.rectangle(result_img, (x, y), (x+bw, y+bh), (255, 255, 255), 4)
cv2.rectangle(result_img, (x, y), (x+bw, y+bh), (0, 0, 255), 2)
# 显示分数
# 计算该区域内的平均差异
region_score = heatmap_avg[y:y+bh, x:x+bw].mean()
label = f"{region_score:.2f}"
# 标签背景与文字
cv2.rectangle(result_img, (x, y-25), (x+80, y), (0,0,255), -1)
cv2.putText(result_img, label, (x+5, y-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2)
# 保存最终结果
cv2.imwrite(output_path, result_img)
print("="*40)
print(f"🎯 扫描完成! 发现区域: {box_count}")
print(f"🖼️ 结果已保存至: {output_path}")
print("="*40)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="DINOv2 Giant 违建热力图检测 (结构敏感版)")
parser.add_argument("t1", help="基准图")
parser.add_argument("t2", help="现状图")
parser.add_argument("out", nargs="?", default="dino_result.jpg", help="输出图片名")
# 为了兼容你的习惯,保留了 crop/step 参数接口,虽然 DINO 不需要它们
parser.add_argument("-c", "--crop", type=int, default=224, help="(已忽略) DINOv2 全图推理")
parser.add_argument("-s", "--step", type=int, default=0, help="(已忽略) DINOv2 全图推理")
parser.add_argument("-b", "--batch", type=int, default=16, help="(已忽略) DINOv2 全图推理")
# 核心参数
# DINO 的 Cosine 差异通常比 DreamSim 小,建议阈值给低一点 (如 0.25 - 0.35)
parser.add_argument("--thresh", type=float, default=0.30, help="检测阈值 (0.0-1.0)")
args = parser.parse_args()
# 初始化并运行
model = init_model()
scan_and_draw(model, args.t1, args.t2, args.out, args.thresh)