Source code for esrf_data_compressor.checker.ssim

# src/esrf_data_compressor/checker/ssim.py

import os
import numpy as np
import h5py
from skimage.metrics import structural_similarity as ssim


def _select_win_size(H: int, W: int) -> int:
    """
    Choose an odd, valid window size for SSIM given slice dimensions H×W.
    win_size = min(H, W, 7), made odd, at least 3.
    """
    win = min(H, W, 7)
    if win % 2 == 0:
        win -= 1
    return max(win, 3)


[docs] def compute_ssim_for_dataset_pair( orig_path: str, comp_path: str, dataset_relpath: str ) -> tuple[float, float]: """ Given two HDF5 files and the relative 3D dataset path (e.g., 'entry_0000/ESRF-ID11/marana/data'), compute SSIM on the first (z=0) and last (z=Z-1) slices. Returns (ssim_first, ssim_last). If a slice is constant, SSIM = 1.0. """ with h5py.File(orig_path, "r") as fo, h5py.File(comp_path, "r") as fc: ds_o = fo[dataset_relpath] ds_c = fc[dataset_relpath] # Ensure both datasets are 3D if ds_o.ndim != 3 or ds_c.ndim != 3: raise IndexError( f"Dataset '{dataset_relpath}' is not 3D (orig: {ds_o.ndim}D, comp: {ds_c.ndim}D)" ) first_o = ds_o[0].astype(np.float64) last_o = ds_o[-1].astype(np.float64) first_c = ds_c[0].astype(np.float64) last_c = ds_c[-1].astype(np.float64) H, W = first_o.shape win = _select_win_size(H, W) def _slice_ssim(a: np.ndarray, b: np.ndarray) -> float: amin, amax = a.min(), a.max() if amax == amin: return 1.0 dr = amax - amin return ssim(a, b, data_range=dr, win_size=win) s0 = _slice_ssim(first_o, first_c) s1 = _slice_ssim(last_o, last_c) return s0, s1
[docs] def compute_ssim_for_file_pair(orig_path: str, comp_path: str) -> tuple[str, list[str]]: """ Compute SSIM for every 3D dataset under `orig_path` vs. `comp_path`. Returns (basename, [report_lines…]), where each line is either: "<dataset_relpath>: SSIM_first=… SSIM_last=…" or an error message. """ basename = os.path.basename(orig_path) report_lines: list[str] = [] with h5py.File(orig_path, "r") as fo: ds_paths: list[str] = [] def visitor(name, obj): if isinstance(obj, h5py.Dataset) and obj.ndim == 3: ds_paths.append(name) fo.visititems(visitor) if not ds_paths: report_lines.append(f"No 3D datasets found in {basename}") return basename, report_lines for ds in ds_paths: try: s0, s1 = compute_ssim_for_dataset_pair(orig_path, comp_path, ds) report_lines.append(f"{ds}: SSIM_first={s0:.4f} SSIM_last={s1:.4f}") except Exception as e: report_lines.append(f"{ds}: ERROR computing SSIM: {e}") return basename, report_lines