Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
ea2c971
split r2r
seanmcculloch Feb 3, 2026
21ef9db
detection dev
seanmcculloch Feb 5, 2026
2cd7a72
fix: split dataset xml generation fix timepoints and calibration tran…
seanmcculloch Feb 11, 2026
d4174fc
chore: clean diff
seanmcculloch Feb 11, 2026
8116fc5
fix: update split xml_to_dataframe with better zarr parser
seanmcculloch Feb 11, 2026
0ae1180
feat: split xml ipd
seanmcculloch Feb 13, 2026
f455069
chore: typo
seanmcculloch Feb 15, 2026
f2ce5f8
docs: inline comment on channel parsing
seanmcculloch Feb 15, 2026
e35375d
chore: .format to f string
seanmcculloch Feb 15, 2026
128cf75
chore: Use os.path.join for path construction in metadata_builder (#168)
Copilot Feb 15, 2026
0e97e1a
test: Add crop bounds validation for split XML IPD implementation (#169)
Copilot Feb 15, 2026
6ec7da7
feat(evaluation): add detection_qc module for IP detection sweep anal…
seanmcculloch Feb 16, 2026
7ffe91d
chore: debug prints
seanmcculloch Feb 16, 2026
1f65983
debug: add diagnostic prints for multiscale loading in IP detection
seanmcculloch Feb 16, 2026
f8389fa
debug: move print to ImageReader.fetch_image_data for visibility
seanmcculloch Feb 16, 2026
af2ae36
fix: prevent double-appending multiscale level in OME-Zarr paths
seanmcculloch Feb 16, 2026
bf07b3c
fix: handle zarr arrays without .keys() in diagnostic print
seanmcculloch Feb 16, 2026
e37f72a
fix: add root zarr inspection to diagnose missing multiscale levels
seanmcculloch Feb 16, 2026
e682956
fix: include per-tile zarr path in split tile multiscale resolution
seanmcculloch Feb 16, 2026
0ba7ef7
debug: add detailed diagnostics for image loading and DoG detection
seanmcculloch Feb 16, 2026
8411eb9
fix: scale interval bounds for multiscale zarr levels
seanmcculloch Feb 16, 2026
58e4e50
refactor: remove verbose diagnostics from production code
seanmcculloch Feb 16, 2026
dac2275
feat(diagnostics): Add comprehensive logging for multiscale IP detect…
seanmcculloch Feb 16, 2026
cf780bc
feat(diagnostics): Add Ray task-level logging with flush
seanmcculloch Feb 16, 2026
ef231c8
fix(image_reader): Allow crop_max to equal array dimension - 1
seanmcculloch Feb 16, 2026
97fda2c
fix: clamp crop_max bounds to valid array dimensions
seanmcculloch Feb 16, 2026
cb0d311
fix: apply split tile crop AFTER downsampling and bounds scaling
seanmcculloch Feb 16, 2026
abb2a27
docs: clarify crop_max scaling may produce out-of-bounds values
seanmcculloch Feb 16, 2026
df8970d
fix: explicitly init and shutdown Ray with OS CPU count
seanmcculloch Feb 17, 2026
8b84714
fix: scale crop bounds by total downsampling factor (2^level * dsxy)
seanmcculloch Feb 17, 2026
c14252d
fix: use /scratch/ray/ as Ray temp directory to avoid filling /tmp
seanmcculloch Feb 17, 2026
1f25fdf
fixes for detection of unsplit xml
seanmcculloch Apr 1, 2026
c3635fd
matching and solver working, skip global optimization
seanmcculloch Apr 2, 2026
9214667
remove _ch_ restriction
seanmcculloch Apr 10, 2026
8c4e963
add retry
seanmcculloch Apr 22, 2026
9ad5a92
checkpoint
seanmcculloch Apr 24, 2026
98ea077
Fix split-image transform handling in matching
seanmcculloch Apr 24, 2026
2da34eb
translation metadata
seanmcculloch Apr 24, 2026
ca31c88
tighten chunk-overlap
seanmcculloch Apr 28, 2026
47dd0cc
checkpoint
seanmcculloch May 5, 2026
aed99a3
pinned versions
seanmcculloch May 6, 2026
51930df
fix double offset of IPs when chunks_per_bound greater than 1
seanmcculloch May 6, 2026
826582e
fix chunks_per_bound double offset
seanmcculloch May 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ ipython_config.py
# install all needed dependencies.
#Pipfile.lock

# uv
uv.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

Expand Down
109 changes: 100 additions & 9 deletions Rhapso/data_prep/xml_to_dataframe.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pandas as pd
import xml.etree.ElementTree as ET
import re

# This component recieves an XML file containing Tiff or Zarr image metadata and converts
# This component receives an XML file containing Tiff or Zarr image metadata and converts
# it into several Dataframes

class XMLToDataFrame:
Expand All @@ -17,9 +18,17 @@ def parse_image_loader_zarr(self, root):

for il in root.findall(".//ImageLoader/zgroups/zgroup"):
view_setup = il.get("setup")
timepoint = il.get("timepoint")
file_path = il.find("path").text if il.find("path") is not None else None
channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0]
timepoint = il.get("tp") or il.get("timepoint")
file_path = il.get("path")
if file_path is None:
element_string = ET.tostring(il, encoding='unicode')
raise ValueError(f"zgroup element missing 'path' attribute: {element_string}")

# default to channel 0 if not parseable from the path name
try:
channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0]
except (IndexError, AttributeError):
channel = 0

image_loader_data.append(
{
Expand Down Expand Up @@ -75,17 +84,99 @@ def parse_image_loader_tiff(self, root):
# Convert the list to a DataFrame and return
return pd.DataFrame(image_loader_data)

def parse_image_loader_split_zarr(self):
pass
def parse_image_loader_split_zarr(self, root):
"""
Parses a split.viewerimgloader XML structure where a single source image is virtually
subdivided into overlapping tiles via SetupIdDefinitions.

Parameters
----------
root : xml.etree.ElementTree.Element
Root element of the parsed XML.

Returns
-------
pd.DataFrame
One row per split tile with columns: view_setup, timepoint, series, channel,
file_path, crop_min, crop_max, zarr_base_path.
"""
outer_loader = root.find(".//ImageLoader[@format='split.viewerimgloader']")
if outer_loader is None:
raise ValueError(
"split.viewerimgloader ImageLoader node not found in XML; "
"ensure the XML contains an ImageLoader with format='split.viewerimgloader'."
)

inner_loader = outer_loader.find("ImageLoader")
if inner_loader is None:
raise ValueError(
"Nested ImageLoader node not found inside split.viewerimgloader configuration."
)

zarr_elem = inner_loader.find("zarr")
if zarr_elem is None or zarr_elem.text is None:
raise ValueError(
"<zarr> node with base path is missing from split.viewerimgloader configuration."
)

zarr_base_path = zarr_elem.text.strip()
# Build lookup from source setup id to (timepoint, zgroup_path)
zgroup_lookup = {}
for zg in inner_loader.findall(".//zgroups/zgroup"):
setup = zg.get("setup")
tp = zg.get("tp") or zg.get("timepoint")
path = zg.get("path")
zgroup_lookup[setup] = (tp, path)

image_loader_data = []
for sid in outer_loader.findall(".//SetupIds/SetupIdDefinition"):
new_id = sid.find("NewId").text.strip()
old_id = sid.find("OldId").text.strip()
crop_min = sid.find("min").text.strip()
crop_max = sid.find("max").text.strip()

if old_id not in zgroup_lookup:
raise ValueError(
f"SetupIdDefinition refers to OldId {old_id!r} that is not present in the "
f"inner loader's zgroups. Available setup ids: {sorted(zgroup_lookup.keys())}"
)
tp, zgroup_path = zgroup_lookup[old_id]

# Attempt to extract the channel from the path, assuming filenames include '_ch_<number>'
# (e.g. both '.zarr' and '.ome.zarr' variants). If this pattern is not present or is
# formatted differently, we deliberately fall back to channel 0 as a default.
channel_match = re.search(r'_ch_(\d+)', zgroup_path)
if channel_match:
channel = channel_match.group(1)
else:
# Default to channel 0 when channel information cannot be parsed from the path.
channel = 0

image_loader_data.append({
"view_setup": new_id,
"timepoint": tp,
"series": 1,
"channel": channel,
"file_path": zgroup_path,
"crop_min": crop_min,
"crop_max": crop_max,
"zarr_base_path": zarr_base_path,
})

return pd.DataFrame(image_loader_data)

def route_image_loader(self, root):
"""
Directs the XML parsing process based on the image loader format specified in the XML.
"""
format_node = root.find(".//ImageLoader")
format_type = format_node.get("format")
if format_node is None:
raise ValueError("No <ImageLoader> element found in XML; cannot determine image loader format.")

if "filemap" in format_type:
format_type = (format_node.get("format") or "").lower()
if "split" in format_type:
return self.parse_image_loader_split_zarr(root)
elif "filemap" in format_type:
return self.parse_image_loader_tiff(root)
else:
return self.parse_image_loader_zarr(root)
Expand All @@ -96,7 +187,7 @@ def parse_view_setups(self, root):
"""
viewsetups_data = []

for vs in root.findall(".//ViewSetup"):
for vs in root.findall("./SequenceDescription/ViewSetups/ViewSetup"):
id_ = vs.find("id").text
# name = vs.find("name").text
name = vs.findtext("name")
Expand Down
4 changes: 3 additions & 1 deletion Rhapso/detection/advanced_refinement.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,9 @@ def filter(self):
lb = row['lower_bound']
ub = row['upper_bound']
if vid == view_id:
to_process_interval = (lb, ub)

ub_inclusive = (ub[0]+1, ub[1]+1, ub[2]+1)
to_process_interval = (lb, ub_inclusive)
ips_block = []
intensities_block = []

Expand Down
49 changes: 46 additions & 3 deletions Rhapso/detection/difference_of_gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,19 @@
"""

class DifferenceOfGaussian:
def __init__(self, min_intensity, max_intensity, sigma, threshold, median_filter, mip_map_downsample):
def __init__(self, min_intensity, max_intensity, sigma, threshold, median_filter, mip_map_downsample,
min_peak_intensity=None):
self.min_intensity = min_intensity
self.max_intensity = max_intensity
self.sigma = sigma
self.threshold = threshold
self.median_filter = median_filter
self.mip_map_downsample = mip_map_downsample
# Optional post-detection filter: drop peaks whose RAW (pre-background-
# subtraction) image intensity at the localized peak is below this
# threshold. None disables. Targets zero-padded borders in stitched
# / fused volumes that DoG latches onto as edge features.
self.min_peak_intensity = min_peak_intensity

def apply_offset(self, peaks, offset_z):
"""
Expand Down Expand Up @@ -305,19 +311,56 @@ def run(self, image_chunk, offset, lb):
"""
Executes the entry point of the script.
"""
# Keep a reference to the RAW chunk before background subtraction so
# the optional min_peak_intensity filter can sample true voxel
# values (zero-padded borders read 0, not the post-subtract residual).
raw_image_chunk = image_chunk
image_chunk = self.background_subtract_xy(image_chunk)
peaks = self.compute_difference_of_gaussian(image_chunk)
print(f"[DoG] image_chunk.shape={image_chunk.shape}, detected_peaks={len(peaks)}, offset={offset}, lb={lb}")

if peaks.size == 0:
intensities = np.empty((0,), dtype=image_chunk.dtype)
final_peaks = peaks

else:
intensities = map_coordinates(image_chunk, peaks.T, order=1, mode='reflect')
final_peaks = self.apply_lower_bounds(peaks, lb)
if self.min_peak_intensity is not None:
# Reject peaks generated by a nearby data→0 step (the
# zero-border artifact in fused/stitched volumes). The DoG
# response peak from an edge step lands ~sigma INSIDE the
# bright side, where the raw value at the peak itself is
# positive — sampling only at the peak coord is therefore
# not enough. Check a (2r+1)^3 neighborhood and reject if
# the local minimum is below threshold (i.e. a zero voxel
# is in the support window). r = ceil(sigma) is enough to
# reach the step that generated the response.
r = max(1, int(np.ceil(float(self.sigma))))
peaks_int = np.rint(peaks).astype(np.int32)
shape = np.asarray(raw_image_chunk.shape, dtype=np.int32)
lo = np.clip(peaks_int - r, 0, shape - 1)
hi = np.clip(peaks_int + r + 1, 1, shape)
keep = np.ones(len(peaks_int), dtype=bool)
thr = float(self.min_peak_intensity)
for i in range(len(peaks_int)):
z0, y0, x0 = lo[i]
z1, y1, x1 = hi[i]
if raw_image_chunk[z0:z1, y0:y1, x0:x1].min() < thr:
keep[i] = False
n_dropped = int((~keep).sum())
if n_dropped:
print(
f"[DoG] min_peak_intensity={self.min_peak_intensity} "
f"(neighborhood r={r}): dropped {n_dropped}/{len(peaks)} "
f"peaks adjacent to sub-threshold voxels"
)
peaks = peaks[keep]
intensities = intensities[keep]
final_peaks = self.upsample_coordinates(peaks)
final_peaks = self.apply_lower_bounds(final_peaks, lb)
final_peaks = self.apply_offset(final_peaks, offset)
final_peaks = self.upsample_coordinates(final_peaks)

print(f"[DoG] final_peaks after transforms={len(final_peaks)}")
return {
'interest_points': final_peaks,
'intensities': intensities
Expand Down
Loading