Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 109 additions & 121 deletions tools/2d_feature_extraction/2d_feature_extraction.py
Original file line number Diff line number Diff line change
@@ -1,127 +1,115 @@
import argparse

import giatools.io
import giatools
import numpy as np
import pandas as pd
import skimage.feature
import scipy.ndimage as ndi
import skimage.measure
import skimage.morphology
import skimage.segmentation

# Fail early if an optional backend is not available
giatools.require_backend('omezarr')

if __name__ == '__main__':

parser = argparse.ArgumentParser(description='Extract image features')

# TODO create factory for boilerplate code
features = parser.add_argument_group('compute features')
features.add_argument('--all', dest='all_features', action='store_true')
features.add_argument('--label', dest='add_label', action='store_true')
features.add_argument('--patches', dest='add_roi_patches', action='store_true')
features.add_argument('--max_intensity', dest='max_intensity', action='store_true')
features.add_argument('--mean_intensity', dest='mean_intensity', action='store_true')
features.add_argument('--min_intensity', dest='min_intensity', action='store_true')
features.add_argument('--moments_hu', dest='moments_hu', action='store_true')
features.add_argument('--centroid', dest='centroid', action='store_true')
features.add_argument('--bbox', dest='bbox', action='store_true')
features.add_argument('--area', dest='area', action='store_true')
features.add_argument('--filled_area', dest='filled_area', action='store_true')
features.add_argument('--convex_area', dest='convex_area', action='store_true')
features.add_argument('--perimeter', dest='perimeter', action='store_true')
features.add_argument('--extent', dest='extent', action='store_true')
features.add_argument('--eccentricity', dest='eccentricity', action='store_true')
features.add_argument('--equivalent_diameter', dest='equivalent_diameter', action='store_true')
features.add_argument('--euler_number', dest='euler_number', action='store_true')
features.add_argument('--inertia_tensor_eigvals', dest='inertia_tensor_eigvals', action='store_true')
features.add_argument('--major_axis_length', dest='major_axis_length', action='store_true')
features.add_argument('--minor_axis_length', dest='minor_axis_length', action='store_true')
features.add_argument('--orientation', dest='orientation', action='store_true')
features.add_argument('--solidity', dest='solidity', action='store_true')
features.add_argument('--moments', dest='moments', action='store_true')
features.add_argument('--convexity', dest='convexity', action='store_true')

parser.add_argument('--label_file_binary', dest='label_file_binary', action='store_true')

parser.add_argument('--raw', dest='raw_file', type=argparse.FileType('r'),
help='Original input file', required=False)
parser.add_argument('label_file', type=argparse.FileType('r'),
help='Label input file')
parser.add_argument('output_file', type=argparse.FileType('w'),
help='Tabular output file')
args = parser.parse_args()

label_file_binary = args.label_file_binary
label_file = args.label_file.name
out_file = args.output_file.name
add_patch = args.add_roi_patches

raw_image = None
if args.raw_file is not None:
raw_image = giatools.io.imread(args.raw_file.name)

raw_label_image = giatools.io.imread(label_file)

df = pd.DataFrame()
if label_file_binary:
raw_label_image = skimage.measure.label(raw_label_image)
regions = skimage.measure.regionprops(raw_label_image, intensity_image=raw_image)

df['it'] = np.arange(len(regions))

if add_patch:
df['image'] = df['it'].map(lambda ait: regions[ait].image.astype(np.float).tolist())
df['intensity_image'] = df['it'].map(lambda ait: regions[ait].intensity_image.astype(np.float).tolist())

# TODO no matrix features, but split in own rows?
if args.add_label or args.all_features:
df['label'] = df['it'].map(lambda ait: regions[ait].label)

if raw_image is not None:
if args.max_intensity or args.all_features:
df['max_intensity'] = df['it'].map(lambda ait: regions[ait].max_intensity)
if args.mean_intensity or args.all_features:
df['mean_intensity'] = df['it'].map(lambda ait: regions[ait].mean_intensity)
if args.min_intensity or args.all_features:
df['min_intensity'] = df['it'].map(lambda ait: regions[ait].min_intensity)
if args.moments_hu or args.all_features:
df['moments_hu'] = df['it'].map(lambda ait: regions[ait].moments_hu)

if args.centroid or args.all_features:
df['centroid'] = df['it'].map(lambda ait: regions[ait].centroid)
if args.bbox or args.all_features:
df['bbox'] = df['it'].map(lambda ait: regions[ait].bbox)
if args.area or args.all_features:
df['area'] = df['it'].map(lambda ait: regions[ait].area)
if args.filled_area or args.all_features:
df['filled_area'] = df['it'].map(lambda ait: regions[ait].filled_area)
if args.convex_area or args.all_features:
df['convex_area'] = df['it'].map(lambda ait: regions[ait].convex_area)
if args.perimeter or args.all_features:
df['perimeter'] = df['it'].map(lambda ait: regions[ait].perimeter)
if args.extent or args.all_features:
df['extent'] = df['it'].map(lambda ait: regions[ait].extent)
if args.eccentricity or args.all_features:
df['eccentricity'] = df['it'].map(lambda ait: regions[ait].eccentricity)
if args.equivalent_diameter or args.all_features:
df['equivalent_diameter'] = df['it'].map(lambda ait: regions[ait].equivalent_diameter)
if args.euler_number or args.all_features:
df['euler_number'] = df['it'].map(lambda ait: regions[ait].euler_number)
if args.inertia_tensor_eigvals or args.all_features:
df['inertia_tensor_eigvals'] = df['it'].map(lambda ait: regions[ait].inertia_tensor_eigvals)
if args.major_axis_length or args.all_features:
df['major_axis_length'] = df['it'].map(lambda ait: regions[ait].major_axis_length)
if args.minor_axis_length or args.all_features:
df['minor_axis_length'] = df['it'].map(lambda ait: regions[ait].minor_axis_length)
if args.orientation or args.all_features:
df['orientation'] = df['it'].map(lambda ait: regions[ait].orientation)
if args.solidity or args.all_features:
df['solidity'] = df['it'].map(lambda ait: regions[ait].solidity)
if args.moments or args.all_features:
df['moments'] = df['it'].map(lambda ait: regions[ait].moments)
if args.convexity or args.all_features:
perimeter = df['it'].map(lambda ait: regions[ait].perimeter)
area = df['it'].map(lambda ait: regions[ait].area)
df['convexity'] = area / (perimeter * perimeter)

del df['it']
df.to_csv(out_file, sep='\t', lineterminator='\n', index=False)
def surface(labels: np.ndarray, label: int) -> int:
"""
Ad-hoc implementation for computation of the "perimeter" of an object in 3D (that is a surface).
"""
assert labels.ndim == 3 # sanity check

# Create 3-D structuring element with 4-connectivity
selem = np.zeros((3, 3, 3), bool)
for ijk in np.ndindex(*selem.shape):
if (np.array(ijk) == 1).sum() >= 2:
selem[*ijk] = True # noqa: E999
assert selem.sum() == 7 # sanity check

# Compute the area of the surface
cc = (labels == label)
cc_interior = ndi.binary_erosion(cc, selem)
surface = np.logical_xor(cc, cc_interior)
return surface.sum() # number of voxels on the surface of the object


def compute_if_dask(obj):
"""
Return the computed object or array if it is a Dask array or deferred computable Dask object.
"""
return obj.compute() if hasattr(obj, 'compute') else obj


if __name__ == '__main__':
tool = giatools.ToolBaseplate()
tool.add_input_image('labels')
tool.add_input_image('intensities', required=False)
tool.parser.add_argument('--output', type=str)
tool.parse_args()

# Validate the input image
try:
label_image = tool.args.input_images['labels']
if any(label_image.shape[label_image.axes.index(axis)] > 1 for axis in label_image.axes if axis not in 'ZYX'):
raise ValueError(f'This tool is not applicable to images with {label_image.original_axes} axes.')

# Extract the image features
for section in tool.run('ZYX'): # the validation code above guarantees that we will have only a single iteration
df = pd.DataFrame()

# Get the labels array and cast to `uint8` if it is `bool` (`skimage.measure.regionprops` refuses `bool` typed arrays)
labels_section_data = section['labels'].data.squeeze()
if np.issubdtype(labels_section_data.dtype, bool):
print('Convert labels from bool to uint8')
labels_section_data = labels_section_data.astype(np.uint8)

# Some features currently cannot be computed from Dask arrays
if any(
feature_name in tool.args.params['features'] for feature_name in (
'inertia_tensor_eigvals',
'axis_major_length',
'axis_minor_length',
'eccentricity',
'orientation',
'moments_hu',
)
):
labels_section_data = compute_if_dask(labels_section_data)

# Compute the image features
if 'intensities' in tool.args.input_images:
regions = skimage.measure.regionprops(labels_section_data, intensity_image=section['intensities'].data.squeeze())
else:
regions = skimage.measure.regionprops(labels_section_data, intensity_image=None)
df['it'] = np.arange(len(regions))
for feature_name in tool.args.params['features']:

# Add the object label
if feature_name == 'label':
df['label'] = df['it'].map(lambda ait: regions[ait].label)

# Add the object perimeter/surface
elif feature_name == 'perimeter' and labels_section_data.ndim == 3:
df['perimeter'] = df['it'].map(
lambda ait: surface(labels_section_data, regions[ait].label), # `skimage.measure.regionprops` cannot compute perimeters for 3-D data
)

# Skip features that are not available when processing 3-D images
elif feature_name in ('eccentricity', 'moments_hu', 'orientation') and labels_section_data.ndim == 3:
print(f'Skip feature that is not available for 3-D images: "{feature_name}"')

# Add another feature from `regions` that was computed via `skimage.measure.regionprops`
else:
try:
df[feature_name] = df['it'].map(lambda ait: getattr(regions[ait], feature_name))
except TypeError:
raise ValueError(f'Unknown feature: "{feature_name}"')

# Resolve any remaining Dask objects to the actual values (e.g., when processing Zarrs)
df = df.map(compute_if_dask)

# Convert lists/tuples/arrays to lists of plain Python numbers (e.g., float instead of np.float64)
df = df.map(
lambda obj: np.asarray(obj).tolist() if type(obj) in (list, tuple, np.ndarray) else obj,
)

del df['it']
df.to_csv(tool.args.raw_args.output, sep='\t', lineterminator='\n', index=False)

except ValueError as err:
exit(err.args[0])
Loading