AllenNeuralDynamics · seanmcculloch · Feb 3, 2026 · Feb 5, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/.gitignore b/.gitignore
@@ -91,6 +91,9 @@ ipython_config.py
 #   install all needed dependencies.
 #Pipfile.lock
 
+# uv
+uv.lock
+
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/
 

diff --git a/Rhapso/data_prep/xml_to_dataframe.py b/Rhapso/data_prep/xml_to_dataframe.py
@@ -1,7 +1,8 @@
 import pandas as pd
 import xml.etree.ElementTree as ET
+import re
 
-# This component recieves an XML file containing Tiff or Zarr image metadata and converts
+# This component receives an XML file containing Tiff or Zarr image metadata and converts
 # it into several Dataframes
 
 class XMLToDataFrame:
@@ -17,9 +18,17 @@ def parse_image_loader_zarr(self, root):
 
         for il in root.findall(".//ImageLoader/zgroups/zgroup"):
             view_setup = il.get("setup")
-            timepoint = il.get("timepoint")
-            file_path = il.find("path").text if il.find("path") is not None else None
-            channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0]
+            timepoint = il.get("tp") or il.get("timepoint")
+            file_path = il.get("path")
+            if file_path is None:
+                element_string = ET.tostring(il, encoding='unicode')
+                raise ValueError(f"zgroup element missing 'path' attribute: {element_string}")
+
+            # default to channel 0 if not parseable from the path name
+            try:
+                channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0]
+            except (IndexError, AttributeError):
+                channel = 0
 
             image_loader_data.append(
                 {
@@ -75,17 +84,99 @@ def parse_image_loader_tiff(self, root):
         # Convert the list to a DataFrame and return
         return pd.DataFrame(image_loader_data)
 
-    def parse_image_loader_split_zarr(self):
-        pass
+    def parse_image_loader_split_zarr(self, root):
+        """
+        Parses a split.viewerimgloader XML structure where a single source image is virtually
+        subdivided into overlapping tiles via SetupIdDefinitions.
+
+        Parameters
+        ----------
+        root : xml.etree.ElementTree.Element
+            Root element of the parsed XML.
+
+        Returns
+        -------
+        pd.DataFrame
+            One row per split tile with columns: view_setup, timepoint, series, channel,
+            file_path, crop_min, crop_max, zarr_base_path.
+        """
+        outer_loader = root.find(".//ImageLoader[@format='split.viewerimgloader']")
+        if outer_loader is None:
+            raise ValueError(
+                "split.viewerimgloader ImageLoader node not found in XML; "
+                "ensure the XML contains an ImageLoader with format='split.viewerimgloader'."
+            )
+
+        inner_loader = outer_loader.find("ImageLoader")
+        if inner_loader is None:
+            raise ValueError(
+                "Nested ImageLoader node not found inside split.viewerimgloader configuration."
+            )
+
+        zarr_elem = inner_loader.find("zarr")
+        if zarr_elem is None or zarr_elem.text is None:
+            raise ValueError(
+                "<zarr> node with base path is missing from split.viewerimgloader configuration."
+            )
+
+        zarr_base_path = zarr_elem.text.strip()
+        # Build lookup from source setup id to (timepoint, zgroup_path)
+        zgroup_lookup = {}
+        for zg in inner_loader.findall(".//zgroups/zgroup"):
+            setup = zg.get("setup")
+            tp = zg.get("tp") or zg.get("timepoint")
+            path = zg.get("path")
+            zgroup_lookup[setup] = (tp, path)
+
+        image_loader_data = []
+        for sid in outer_loader.findall(".//SetupIds/SetupIdDefinition"):
+            new_id = sid.find("NewId").text.strip()
+            old_id = sid.find("OldId").text.strip()
+            crop_min = sid.find("min").text.strip()
+            crop_max = sid.find("max").text.strip()
+
+            if old_id not in zgroup_lookup:
+                raise ValueError(
+                    f"SetupIdDefinition refers to OldId {old_id!r} that is not present in the "
+                    f"inner loader's zgroups. Available setup ids: {sorted(zgroup_lookup.keys())}"
+                )
+            tp, zgroup_path = zgroup_lookup[old_id]
+
+            # Attempt to extract the channel from the path, assuming filenames include '_ch_<number>'
+            # (e.g. both '.zarr' and '.ome.zarr' variants). If this pattern is not present or is
+            # formatted differently, we deliberately fall back to channel 0 as a default.
+            channel_match = re.search(r'_ch_(\d+)', zgroup_path)
+            if channel_match:
+                channel = channel_match.group(1)
+            else:
+                # Default to channel 0 when channel information cannot be parsed from the path.
+                channel = 0
+
+            image_loader_data.append({
+                "view_setup": new_id,
+                "timepoint": tp,
+                "series": 1,
+                "channel": channel,
+                "file_path": zgroup_path,
+                "crop_min": crop_min,
+                "crop_max": crop_max,
+                "zarr_base_path": zarr_base_path,
+            })
+
+        return pd.DataFrame(image_loader_data)
 
     def route_image_loader(self, root):
         """
         Directs the XML parsing process based on the image loader format specified in the XML.
         """
         format_node = root.find(".//ImageLoader")
-        format_type = format_node.get("format")
+        if format_node is None:
+            raise ValueError("No <ImageLoader> element found in XML; cannot determine image loader format.")
 
-        if "filemap" in format_type:
+        format_type = (format_node.get("format") or "").lower()
+        if "split" in format_type:
+            return self.parse_image_loader_split_zarr(root)
+        elif "filemap" in format_type:
             return self.parse_image_loader_tiff(root)
         else:
             return self.parse_image_loader_zarr(root)
@@ -96,7 +187,7 @@ def parse_view_setups(self, root):
         """
         viewsetups_data = []
 
-        for vs in root.findall(".//ViewSetup"):
+        for vs in root.findall("./SequenceDescription/ViewSetups/ViewSetup"):
             id_ = vs.find("id").text
             # name = vs.find("name").text
             name = vs.findtext("name")

diff --git a/Rhapso/detection/advanced_refinement.py b/Rhapso/detection/advanced_refinement.py
@@ -163,7 +163,9 @@ def filter(self):
                 lb = row['lower_bound']
                 ub = row['upper_bound']
                 if vid == view_id:
-                    to_process_interval = (lb, ub)
+
+                    ub_inclusive = (ub[0]+1, ub[1]+1, ub[2]+1)
+                    to_process_interval = (lb, ub_inclusive)
                     ips_block = []
                     intensities_block = []
 

diff --git a/Rhapso/detection/difference_of_gaussian.py b/Rhapso/detection/difference_of_gaussian.py
@@ -10,13 +10,19 @@
 """
 
 class DifferenceOfGaussian:
-    def __init__(self, min_intensity, max_intensity, sigma, threshold, median_filter, mip_map_downsample):
+    def __init__(self, min_intensity, max_intensity, sigma, threshold, median_filter, mip_map_downsample,
+                 min_peak_intensity=None):
         self.min_intensity = min_intensity
         self.max_intensity = max_intensity
         self.sigma = sigma
         self.threshold = threshold
         self.median_filter = median_filter
         self.mip_map_downsample = mip_map_downsample
+        # Optional post-detection filter: drop peaks whose RAW (pre-background-
+        # subtraction) image intensity at the localized peak is below this
+        # threshold. None disables. Targets zero-padded borders in stitched
+        # / fused volumes that DoG latches onto as edge features.
+        self.min_peak_intensity = min_peak_intensity
 
     def apply_offset(self, peaks, offset_z):
         """
@@ -305,19 +311,56 @@ def run(self, image_chunk, offset, lb):
         """
         Executes the entry point of the script.
         """
+        # Keep a reference to the RAW chunk before background subtraction so
+        # the optional min_peak_intensity filter can sample true voxel
+        # values (zero-padded borders read 0, not the post-subtract residual).
+        raw_image_chunk = image_chunk
         image_chunk = self.background_subtract_xy(image_chunk)
         peaks = self.compute_difference_of_gaussian(image_chunk)
+        print(f"[DoG] image_chunk.shape={image_chunk.shape}, detected_peaks={len(peaks)}, offset={offset}, lb={lb}")
 
         if peaks.size == 0:
             intensities = np.empty((0,), dtype=image_chunk.dtype)
             final_peaks = peaks
 
         else:
             intensities = map_coordinates(image_chunk, peaks.T, order=1, mode='reflect')
-            final_peaks = self.apply_lower_bounds(peaks, lb)
+            if self.min_peak_intensity is not None:
+                # Reject peaks generated by a nearby data→0 step (the
+                # zero-border artifact in fused/stitched volumes). The DoG
+                # response peak from an edge step lands ~sigma INSIDE the
+                # bright side, where the raw value at the peak itself is
+                # positive — sampling only at the peak coord is therefore
+                # not enough. Check a (2r+1)^3 neighborhood and reject if
+                # the local minimum is below threshold (i.e. a zero voxel
+                # is in the support window). r = ceil(sigma) is enough to
+                # reach the step that generated the response.
+                r = max(1, int(np.ceil(float(self.sigma))))
+                peaks_int = np.rint(peaks).astype(np.int32)
+                shape = np.asarray(raw_image_chunk.shape, dtype=np.int32)
+                lo = np.clip(peaks_int - r, 0, shape - 1)
+                hi = np.clip(peaks_int + r + 1, 1, shape)
+                keep = np.ones(len(peaks_int), dtype=bool)
+                thr = float(self.min_peak_intensity)
+                for i in range(len(peaks_int)):
+                    z0, y0, x0 = lo[i]
+                    z1, y1, x1 = hi[i]
+                    if raw_image_chunk[z0:z1, y0:y1, x0:x1].min() < thr:
+                        keep[i] = False
+                n_dropped = int((~keep).sum())
+                if n_dropped:
+                    print(
+                        f"[DoG] min_peak_intensity={self.min_peak_intensity} "
+                        f"(neighborhood r={r}): dropped {n_dropped}/{len(peaks)} "
+                        f"peaks adjacent to sub-threshold voxels"
+                    )
+                peaks = peaks[keep]
+                intensities = intensities[keep]
+            final_peaks = self.upsample_coordinates(peaks)
+            final_peaks = self.apply_lower_bounds(final_peaks, lb)
             final_peaks = self.apply_offset(final_peaks, offset)
-            final_peaks = self.upsample_coordinates(final_peaks)
 
+        print(f"[DoG] final_peaks after transforms={len(final_peaks)}")
         return {
             'interest_points': final_peaks,
             'intensities': intensities