From 53dadec1105df4a7a705846b8eb75b97e5155ab3 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 29 Apr 2026 15:03:32 +0000
Subject: [PATCH] =?UTF-8?q?fix(doc-tests):=20closes=20#244=20=E2=80=94=20a?=
 =?UTF-8?q?uto-downsample=20retina=202=C3=97=20screenshots=20before=20SSIM?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The screenshot comparison in `image_diff::diff()` hard-errored on any
size mismatch before SSIM ever ran.  On macOS retina hardware the
gallery binary captures a 2× PNG (e.g. 1800×1940) while the blessed
baseline is 1× (900×970); the strict `actual.width() != baseline.width()`
check returned `Err("size mismatch")` immediately, producing a recurring
SCREENSHOT_DIFF failure for 30+ commits.

Fix: when the actual screenshot is exactly 2× the baseline in both
dimensions, downsample with a 2×2 box-filter (`halve()`) before SSIM.
Any other size mismatch still returns an error (so genuine resize
regressions are still caught). A downsampled 1× image compared with
the existing 1× baseline falls well within the 0.05 SSIM threshold.

Added `#[derive(Debug)]` to `DiffOutcome` (needed for `unwrap_err()` in
tests) and 3 unit tests pinning the new behaviour:
- `halve_averages_2x2_blocks` — verifies box-filter arithmetic
- `diff_retina_2x_against_1x_baseline_passes` — the exact retina repro
- `diff_arbitrary_size_mismatch_errors` — non-2× mismatches still fail

https://claude.ai/code/session_01FVjeQDnyymLz7yTJbnsyKG
---
 Cargo.lock                               |   1 +
 crates/perry-doc-tests/Cargo.toml        |   3 +
 crates/perry-doc-tests/src/image_diff.rs | 130 +++++++++++++++++++++--
 3 files changed, 124 insertions(+), 10 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 62d329204..f57144b09 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4287,6 +4287,7 @@ dependencies = [
  "rgb",
  "serde",
  "serde_json",
+ "tempfile",
  "walkdir",
 ]
 
diff --git a/crates/perry-doc-tests/Cargo.toml b/crates/perry-doc-tests/Cargo.toml
index c0d9b9177..18c065f44 100644
--- a/crates/perry-doc-tests/Cargo.toml
+++ b/crates/perry-doc-tests/Cargo.toml
@@ -18,3 +18,6 @@ clap = { workspace = true }
 dssim-core = "3.4"
 image = { version = "0.25", default-features = false, features = ["png"] }
 rgb = "0.8"
+
+[dev-dependencies]
+tempfile = "3"
diff --git a/crates/perry-doc-tests/src/image_diff.rs b/crates/perry-doc-tests/src/image_diff.rs
index 23888da13..02ae9814d 100644
--- a/crates/perry-doc-tests/src/image_diff.rs
+++ b/crates/perry-doc-tests/src/image_diff.rs
@@ -13,6 +13,7 @@ const DEFAULT_THRESHOLD: f64 = 0.010;
 
 /// Diff outcome. `distance` is dssim's raw SSIM distance (0 = identical).
 /// `threshold` is what was compared against.
+#[derive(Debug)]
 pub struct DiffOutcome {
     pub distance: f64,
     pub threshold: f64,
@@ -26,24 +27,39 @@ impl DiffOutcome {
 
 /// Diff `actual_png` against `baseline_png` using SSIM.
 /// Returns Err if either image is missing or malformed.
+///
+/// Retina / HiDPI tolerance: if the actual screenshot is exactly 2× the
+/// baseline in both dimensions (a retina macOS capture vs a 1× baseline),
+/// the actual is downsampled with a 2×2 box filter before comparison.
+/// Any other size mismatch is still an error.
 pub fn diff(actual_png: &Path, baseline_png: &Path, threshold: f64) -> Result<DiffOutcome> {
     let actual = load(actual_png)
         .with_context(|| format!("loading actual screenshot {}", actual_png.display()))?;
     let baseline = load(baseline_png)
         .with_context(|| format!("loading baseline {}", baseline_png.display()))?;
 
-    if actual.width() != baseline.width() || actual.height() != baseline.height() {
-        return Err(anyhow!(
-            "size mismatch: actual {}x{} vs baseline {}x{}",
-            actual.width(),
-            actual.height(),
-            baseline.width(),
-            baseline.height()
-        ));
-    }
+    // Auto-correct for retina (2× backing scale) captures against 1× baselines.
+    let halved;
+    let actual_ref: &image::RgbaImage =
+        if actual.width() == baseline.width() * 2 && actual.height() == baseline.height() * 2 {
+            halved = halve(&actual);
+            &halved
+        } else {
+            if actual.width() != baseline.width() || actual.height() != baseline.height() {
+                return Err(anyhow!(
+                    "size mismatch: actual {}x{} vs baseline {}x{} \
+                     (only exact 2× retina scaling is auto-corrected)",
+                    actual.width(),
+                    actual.height(),
+                    baseline.width(),
+                    baseline.height()
+                ));
+            }
+            &actual
+        };
 
     let attr = dssim_core::Dssim::new();
-    let actual_img = to_dssim(&actual, &attr)?;
+    let actual_img = to_dssim(actual_ref, &attr)?;
     let baseline_img = to_dssim(&baseline, &attr)?;
     let (val, _maps) = attr.compare(&baseline_img, &actual_img);
     Ok(DiffOutcome {
@@ -52,6 +68,35 @@ pub fn diff(actual_png: &Path, baseline_png: &Path, threshold: f64) -> Result<Di
     })
 }
 
+/// Downsample a 2× retina image to 1× using a 2×2 box filter.
+fn halve(img: &image::RgbaImage) -> image::RgbaImage {
+    let w = img.width() / 2;
+    let h = img.height() / 2;
+    let mut out = image::RgbaImage::new(w, h);
+    for y in 0..h {
+        for x in 0..w {
+            let avg = |a: u8, b: u8, c: u8, d: u8| -> u8 {
+                (((a as u32) + (b as u32) + (c as u32) + (d as u32) + 2) / 4) as u8
+            };
+            let p00 = img.get_pixel(x * 2, y * 2).0;
+            let p10 = img.get_pixel(x * 2 + 1, y * 2).0;
+            let p01 = img.get_pixel(x * 2, y * 2 + 1).0;
+            let p11 = img.get_pixel(x * 2 + 1, y * 2 + 1).0;
+            out.put_pixel(
+                x,
+                y,
+                image::Rgba([
+                    avg(p00[0], p10[0], p01[0], p11[0]),
+                    avg(p00[1], p10[1], p01[1], p11[1]),
+                    avg(p00[2], p10[2], p01[2], p11[2]),
+                    avg(p00[3], p10[3], p01[3], p11[3]),
+                ]),
+            );
+        }
+    }
+    out
+}
+
 /// Look up the threshold for a given baseline name + host OS.
 /// Falls back to `DEFAULT_THRESHOLD` if not specified. Unknown keys at the top
 /// level (`_comment`, anything else) are ignored, so the JSON file can carry
@@ -92,3 +137,68 @@ fn to_dssim(
     attr.create_image_rgba(&pixels, width, height)
         .ok_or_else(|| anyhow!("dssim failed to ingest image"))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn solid(w: u32, h: u32, r: u8, g: u8, b: u8) -> image::RgbaImage {
+        image::RgbaImage::from_fn(w, h, |_, _| image::Rgba([r, g, b, 255]))
+    }
+
+    #[test]
+    fn halve_averages_2x2_blocks() {
+        // 4×2 image with two distinct colors side by side (each 2×2 block is one color).
+        let mut img = image::RgbaImage::new(4, 2);
+        for y in 0..2u32 {
+            for x in 0..2u32 {
+                img.put_pixel(x, y, image::Rgba([200, 100, 50, 255]));
+                img.put_pixel(x + 2, y, image::Rgba([100, 200, 150, 255]));
+            }
+        }
+        let out = halve(&img);
+        assert_eq!(out.width(), 2);
+        assert_eq!(out.height(), 1);
+        // Left pixel should be the average of (200,100,50) × 4 → (200,100,50)
+        assert_eq!(out.get_pixel(0, 0).0, [200, 100, 50, 255]);
+        // Right pixel should be the average of (100,200,150) × 4 → (100,200,150)
+        assert_eq!(out.get_pixel(1, 0).0, [100, 200, 150, 255]);
+    }
+
+    #[test]
+    fn diff_identical_same_size_passes() {
+        // Write two identical tiny PNGs to temp files and diff them.
+        let dir = tempfile::tempdir().unwrap();
+        let a = dir.path().join("a.png");
+        let b = dir.path().join("b.png");
+        solid(4, 4, 128, 128, 128).save(&a).unwrap();
+        solid(4, 4, 128, 128, 128).save(&b).unwrap();
+        let outcome = diff(&a, &b, 0.01).unwrap();
+        assert!(outcome.passed(), "identical images should pass");
+    }
+
+    #[test]
+    fn diff_retina_2x_against_1x_baseline_passes() {
+        // Simulate a retina capture: baseline 2×2, actual 4×4 (same solid color).
+        let dir = tempfile::tempdir().unwrap();
+        let actual_path = dir.path().join("actual.png");
+        let baseline_path = dir.path().join("baseline.png");
+        solid(4, 4, 64, 128, 192).save(&actual_path).unwrap();
+        solid(2, 2, 64, 128, 192).save(&baseline_path).unwrap();
+        let outcome = diff(&actual_path, &baseline_path, 0.05).unwrap();
+        assert!(outcome.passed(), "2× retina capture should pass after downsampling");
+    }
+
+    #[test]
+    fn diff_arbitrary_size_mismatch_errors() {
+        let dir = tempfile::tempdir().unwrap();
+        let a = dir.path().join("a.png");
+        let b = dir.path().join("b.png");
+        solid(6, 4, 0, 0, 0).save(&a).unwrap();
+        solid(4, 4, 0, 0, 0).save(&b).unwrap();
+        let result = diff(&a, &b, 0.05);
+        assert!(result.is_err(), "non-2× mismatch should be an error");
+        let msg = format!("{}", result.unwrap_err());
+        assert!(msg.contains("size mismatch"));
+    }
+}