Source code for image_analysis_3D.featurization_utils.loading_classes

"""Data-loading classes for featurization workflows."""

from __future__ import annotations

import logging
import pathlib

import numpy
import skimage.io
import skimage.measure

logging.basicConfig(level=logging.INFO)


[docs] class ImageSetLoader: """ Load an image set consisting of raw z stack images and segmentation masks. A class to load an image set consisting of raw z stack images from multiple spectral channels and segmentation masks. The images are loaded into a dictionary, and various attributes and compartments are extracted from the images. The class also provides methods to retrieve images and their attributes. Parameters ---------- image_set_path : pathlib.Path Path to the image set directory. mask_set_path : pathlib.Path Path to the mask set directory. anisotropy_spacing : tuple The anisotropy spacing of the images in format (z_spacing, y_spacing, x_spacing). channel_mapping : dict A dictionary mapping channel names to their corresponding image file names. Example: ``{'nuclei': 'nuclei_', 'cell': 'cell_', 'cytoplasm': 'cytoplasm_'}`` Attributes ---------- image_set_name : str The name of the image set. anisotropy_spacing : tuple The anisotropy spacing of the images. anisotropy_factor : float The anisotropy factor calculated from the spacing. image_set_dict : dict A dictionary containing the loaded images, with keys as channel names. unique_mask_objects : dict A dictionary containing unique object IDs for each mask in the image set. unique_compartment_objects : dict A dictionary containing unique object IDs for each compartment in the image set. A compartment is defined as a segmented region in the image (e.g., Cell, Cytoplasm, Nuclei, Organoid). The compartments are bounds for measurements. image_names : list A list of image names in the image set. compartments : list A list of compartment names in the image set. Methods ------- retrieve_image_attributes() Retrieve unique object IDs for each mask in the image set. get_unique_objects_in_compartments() Retrieve unique object IDs for each compartment in the image set. get_image(key) Retrieve the image corresponding to the specified key. get_image_names() Retrieve the names of images in the image set. get_compartments() Retrieve the names of compartments in the image set. get_anisotropy() Retrieve the anisotropy factor. """ def __init__( self, image_set_path: pathlib.Path, mask_set_path: pathlib.Path | None, anisotropy_spacing: tuple[float, float, float], channel_mapping: dict[str, str], image_set_name: str | None = None, mask_key_name: list[str] | None = None, raw_image_key_name: list[str] | None = None, ) -> None: """ Initialize the ImageSetLoader with the path to the image set, spacing, and channel mapping. Parameters ---------- image_set_path : pathlib.Path Path to the image set directory. anisotropy_spacing : tuple The anisotropy spacing of the images. In the format (z_spacing, y_spacing, x_spacing). channel_mapping : dict A dictionary mapping channel names to their corresponding image file names. Example: {'nuclei': 'nuclei_', 'cell': 'cell_', 'cytoplasm': 'cytoplasm_'} image_set_name : str | None Optional name for the image set. This is typuically the well_fov name, but can be set to None if not applicable. mask_key_name : list[str] | None Optional list of strings to identify mask files in the file names. If None, no specific string is used to identify mask files. raw_image_key_name : list[str] | None Optional list of strings to identify raw image files in the file names. If None, no specific string is used to identify raw image files. """ if mask_key_name is None: mask_key_name = [] if raw_image_key_name is None: raw_image_key_name = [] self.anisotropy_spacing = anisotropy_spacing self.anisotropy_factor = self.anisotropy_spacing[0] / self.anisotropy_spacing[1] self.image_set_name = image_set_name if image_set_path is None: channel_files = [] else: channel_files = sorted(image_set_path.glob("*")) channel_files = [ f for f in channel_files if f.suffix in [".tif", ".tiff"] and any(key in f.name for key in raw_image_key_name) ] self.mask_set_path = mask_set_path mask_files = sorted(mask_set_path.glob("*")) if mask_set_path else [] mask_files = [ f for f in mask_files if f.suffix in [".tif", ".tiff"] and any(key in f.name for key in mask_key_name) ] # Load images into a dictionary self.image_set_dict = {} for f in channel_files: for key, value in channel_mapping.items(): if value in f.name: self.image_set_dict[key] = skimage.io.imread(f) for f in mask_files: for key, value in channel_mapping.items(): if value in f.name: self.image_set_dict[key] = skimage.io.imread(f) self.retrieve_image_attributes() self.get_compartments() self.get_image_names() self.get_unique_objects_in_compartments()
[docs] def retrieve_image_attributes(self) -> None: """ This is also a quick and dirty way of loading two types of images: 1. masks (multi-indexed segmentation masks) 2. The spectral images to extract morphology features from My naming convention puts the work "mask" in the segmentation images this this is a way to differentiate each mask of each compartment apart from the spectral images. Future work should be to load the images in a more structured way that does not depend on the file naming convention. """ self.unique_mask_objects = {} for key, value in self.image_set_dict.items(): if "mask" in key: self.unique_mask_objects[key] = numpy.unique(value)
[docs] def get_unique_objects_in_compartments(self) -> None: """Populate unique object IDs per compartment.""" self.unique_compartment_objects = {} if len(self.compartments) == 0: self.compartments = None for compartment in self.compartments: self.unique_compartment_objects[compartment] = numpy.unique( self.image_set_dict[compartment] ) # remove the 0 label self.unique_compartment_objects[compartment] = [ x for x in self.unique_compartment_objects[compartment] if x != 0 ]
[docs] def get_image(self, key: str) -> numpy.ndarray: """Return an image array for a given key. Parameters ---------- key : str Channel or mask key. Returns ------- numpy.ndarray Image array for the requested key. """ return self.image_set_dict[key]
[docs] def get_image_names(self) -> list[str]: """Populate image (non-compartment) names. Returns ------- list[str] List of image names excluding compartment masks. """ compartments = ( self.compartments if self.compartments is not None and isinstance(self.compartments, list) else [] ) self.image_names = [ x for x in self.image_set_dict.keys() if x not in compartments ] return self.image_names
[docs] def get_compartments(self) -> list[str]: """Populate compartment names from available keys. Returns ------- list[str] List of compartment keys. """ self.compartments = [ x for x in self.image_set_dict.keys() if "Nuclei" in x or "Cell" in x or "Cytoplasm" in x or "Organoid" in x ] return self.compartments
[docs] def get_anisotropy(self) -> float: """Return the anisotropy factor for the image set. Returns ------- float Ratio of z-spacing to y-spacing. """ return self.anisotropy_spacing[0] / self.anisotropy_spacing[1]
[docs] class ObjectLoader: """ A class to load objects from a labeled image and extract their properties. Where an object is defined as a segmented region in the image. This could be a cell, a nucleus, or any other compartment segmented. Parameters ---------- image : numpy.ndarray The image from which to extract objects. Preferably a 3D image -> z, y, x label_image : numpy.ndarray The labeled image containing the segmented objects. channel_name : str The name of the channel from which the objects are extracted. compartment_name : str The name of the compartment from which the objects are extracted. Attributes ---------- image : numpy.ndarray The image from which the objects are extracted. label_image : numpy.ndarray The labeled image containing the segmented objects. channel : str The name of the channel from which the objects are extracted. compartment : str The name of the compartment from which the objects are extracted. objects : numpy.ndarray The labeled image containing the segmented objects. object_ids : numpy.ndarray The unique object IDs for the segmented objects. Methods ------- __init__(image, label_image, channel_name, compartment_name) Initializes the ObjectLoader with the image, label image, channel name, and compartment name. """
[docs] def __init__( self, image: numpy.ndarray | None, label_image: numpy.ndarray, channel_name: str | None, compartment_name: str, ) -> None: """Initialize object loader with image and labels. Parameters ---------- image : numpy.ndarray Image array used for measurements. label_image : numpy.ndarray Labeled segmentation mask. channel_name : str | None Channel name for the image. compartment_name : str Compartment name for the labels. """ self.image = image self.label_image = label_image self.channel = channel_name self.compartment = compartment_name # get the labeled image objects self.object_ids = numpy.unique(label_image) # drop the 0 label self.object_ids = [x for x in self.object_ids if x != 0]
[docs] class TwoObjectLoader: """ A class to load two images and a label image for a specific compartment. This class is primarily used for loading images for two-channel analysis like co-localization. Parameters ---------- image_set_loader : ImageSetLoader An instance of the ImageSetLoader class containing the image set. compartment : str The name of the compartment for which the label image is loaded. channel1 : str The name of the first channel to be loaded. channel2 : str The name of the second channel to be loaded. Attributes ---------- image_set_loader : ImageSetLoader An instance of the ImageSetLoader class containing the image set. compartment : str The name of the compartment for which the label image is loaded. label_image : numpy.ndarray The labeled image containing the segmented objects for the specified compartment. image1 : numpy.ndarray The image corresponding to the first channel. image2 : numpy.ndarray The image corresponding to the second channel. object_ids : numpy.ndarray The unique object IDs for the segmented objects in the specified compartment. Methods ------- __init__(image_set_loader, compartment, channel1, channel2) Initializes the TwoObjectLoader with the image set loader, compartment, and channel names. """
[docs] def __init__( self, image_set_loader: ImageSetLoader, compartment: str, channel1: str, channel2: str, ) -> None: """Initialize a two-channel loader for a compartment. Parameters ---------- image_set_loader : ImageSetLoader Image set loader containing images and masks. compartment : str Compartment name for the label image. channel1 : str First channel name to load. channel2 : str Second channel name to load. """ self.image_set_loader = image_set_loader self.compartment = compartment self.label_image = self.image_set_loader.image_set_dict[compartment].copy() self.image1 = self.image_set_loader.image_set_dict[channel1].copy() self.image2 = self.image_set_loader.image_set_dict[channel2].copy() self.object_ids = image_set_loader.unique_compartment_objects[compartment]