Source code for image_analysis_3D.featurization_utils.loading_classes

"""Data-loading classes for featurization workflows."""

from __future__ import annotations

import logging
import pathlib

import numpy
import skimage.io
import skimage.measure

logging.basicConfig(level=logging.INFO)



[docs]
class ImageSetLoader:
    """
    Load an image set consisting of raw z stack images and segmentation masks.

    A class to load an image set consisting of raw z stack images from multiple
    spectral channels and segmentation masks. The images are loaded into a
    dictionary, and various attributes and compartments are extracted from the
    images. The class also provides methods to retrieve images and their attributes.

    Parameters
    ----------
    image_set_path : pathlib.Path
        Path to the image set directory.
    mask_set_path : pathlib.Path
        Path to the mask set directory.
    anisotropy_spacing : tuple
        The anisotropy spacing of the images in format (z_spacing, y_spacing, x_spacing).
    channel_mapping : dict
        A dictionary mapping channel names to their corresponding image file names.
        Example: ``{'nuclei': 'nuclei_', 'cell': 'cell_', 'cytoplasm': 'cytoplasm_'}``

    Attributes
    ----------
    image_set_name : str
        The name of the image set.
    anisotropy_spacing : tuple
        The anisotropy spacing of the images.
    anisotropy_factor : float
        The anisotropy factor calculated from the spacing.
    image_set_dict : dict
        A dictionary containing the loaded images, with keys as channel names.
    unique_mask_objects : dict
        A dictionary containing unique object IDs for each mask in the image set.
    unique_compartment_objects : dict
        A dictionary containing unique object IDs for each compartment in the image set.
        A compartment is defined as a segmented region in the image (e.g., Cell,
        Cytoplasm, Nuclei, Organoid). The compartments are bounds for measurements.
    image_names : list
        A list of image names in the image set.
    compartments : list
        A list of compartment names in the image set.

    Methods
    -------
    retrieve_image_attributes()
        Retrieve unique object IDs for each mask in the image set.
    get_unique_objects_in_compartments()
        Retrieve unique object IDs for each compartment in the image set.
    get_image(key)
        Retrieve the image corresponding to the specified key.
    get_image_names()
        Retrieve the names of images in the image set.
    get_compartments()
        Retrieve the names of compartments in the image set.
    get_anisotropy()
        Retrieve the anisotropy factor.
    """

    def __init__(
        self,
        image_set_path: pathlib.Path,
        mask_set_path: pathlib.Path | None,
        anisotropy_spacing: tuple[float, float, float],
        channel_mapping: dict[str, str],
        image_set_name: str | None = None,
        mask_key_name: list[str] | None = None,
        raw_image_key_name: list[str] | None = None,
    ) -> None:
        """
        Initialize the ImageSetLoader with the path to the image set, spacing, and channel mapping.

        Parameters
        ----------
        image_set_path : pathlib.Path
            Path to the image set directory.
        anisotropy_spacing : tuple
            The anisotropy spacing of the images. In the format (z_spacing, y_spacing, x_spacing).
        channel_mapping : dict
            A dictionary mapping channel names to their corresponding image file names.
            Example: {'nuclei': 'nuclei_', 'cell': 'cell_', 'cytoplasm': 'cytoplasm_'}
        image_set_name : str | None
            Optional name for the image set.
            This is typuically the well_fov name, but can be set to None if not applicable.
        mask_key_name : list[str] | None
            Optional list of strings to identify mask files in the file names. If None, no specific string is used to identify mask files.
        raw_image_key_name : list[str] | None
            Optional list of strings to identify raw image files in the file names. If None, no specific string is used to identify raw image files.
        """
        if mask_key_name is None:
            mask_key_name = []
        if raw_image_key_name is None:
            raw_image_key_name = []
        self.anisotropy_spacing = anisotropy_spacing
        self.anisotropy_factor = self.anisotropy_spacing[0] / self.anisotropy_spacing[1]
        self.image_set_name = image_set_name
        if image_set_path is None:
            channel_files = []
        else:
            channel_files = sorted(image_set_path.glob("*"))
            channel_files = [
                f
                for f in channel_files
                if f.suffix in [".tif", ".tiff"]
                and any(key in f.name for key in raw_image_key_name)
            ]

        self.mask_set_path = mask_set_path

        mask_files = sorted(mask_set_path.glob("*")) if mask_set_path else []
        mask_files = [
            f
            for f in mask_files
            if f.suffix in [".tif", ".tiff"]
            and any(key in f.name for key in mask_key_name)
        ]

        # Load images into a dictionary
        self.image_set_dict = {}
        for f in channel_files:
            for key, value in channel_mapping.items():
                if value in f.name:
                    self.image_set_dict[key] = skimage.io.imread(f)
        for f in mask_files:
            for key, value in channel_mapping.items():
                if value in f.name:
                    self.image_set_dict[key] = skimage.io.imread(f)

        self.retrieve_image_attributes()
        self.get_compartments()
        self.get_image_names()
        self.get_unique_objects_in_compartments()


[docs]
    def retrieve_image_attributes(self) -> None:
        """
        This is also a quick and dirty way of loading two types of images:
            1. masks (multi-indexed segmentation masks)
            2. The spectral images to extract morphology features from

        My naming convention puts the work "mask" in the segmentation images this
        this is a way to differentiate each mask of each compartment
        apart from the spectral images.

        Future work should be to load the images in a more structured way
        that does not depend on the file naming convention.
        """
        self.unique_mask_objects = {}
        for key, value in self.image_set_dict.items():
            if "mask" in key:
                self.unique_mask_objects[key] = numpy.unique(value)



[docs]
    def get_unique_objects_in_compartments(self) -> None:
        """Populate unique object IDs per compartment."""
        self.unique_compartment_objects = {}
        if len(self.compartments) == 0:
            self.compartments = None
        for compartment in self.compartments:
            self.unique_compartment_objects[compartment] = numpy.unique(
                self.image_set_dict[compartment]
            )
            # remove the 0 label
            self.unique_compartment_objects[compartment] = [
                x for x in self.unique_compartment_objects[compartment] if x != 0
            ]



[docs]
    def get_image(self, key: str) -> numpy.ndarray:
        """Return an image array for a given key.

        Parameters
        ----------
        key : str
            Channel or mask key.

        Returns
        -------
        numpy.ndarray
            Image array for the requested key.
        """
        return self.image_set_dict[key]



[docs]
    def get_image_names(self) -> list[str]:
        """Populate image (non-compartment) names.

        Returns
        -------
        list[str]
            List of image names excluding compartment masks.
        """
        compartments = (
            self.compartments
            if self.compartments is not None and isinstance(self.compartments, list)
            else []
        )
        self.image_names = [
            x for x in self.image_set_dict.keys() if x not in compartments
        ]
        return self.image_names



[docs]
    def get_compartments(self) -> list[str]:
        """Populate compartment names from available keys.

        Returns
        -------
        list[str]
            List of compartment keys.
        """
        self.compartments = [
            x
            for x in self.image_set_dict.keys()
            if "Nuclei" in x or "Cell" in x or "Cytoplasm" in x or "Organoid" in x
        ]
        return self.compartments



[docs]
    def get_anisotropy(self) -> float:
        """Return the anisotropy factor for the image set.

        Returns
        -------
        float
            Ratio of z-spacing to y-spacing.
        """
        return self.anisotropy_spacing[0] / self.anisotropy_spacing[1]





[docs]
class ObjectLoader:
    """
    A class to load objects from a labeled image and extract their properties.
    Where an object is defined as a segmented region in the image.
    This could be a cell, a nucleus, or any other compartment segmented.

    Parameters
    ----------
    image : numpy.ndarray
        The image from which to extract objects. Preferably a 3D image -> z, y, x
    label_image : numpy.ndarray
        The labeled image containing the segmented objects.
    channel_name : str
        The name of the channel from which the objects are extracted.
    compartment_name : str
        The name of the compartment from which the objects are extracted.

    Attributes
    ----------
    image : numpy.ndarray
        The image from which the objects are extracted.
    label_image : numpy.ndarray
        The labeled image containing the segmented objects.
    channel : str
        The name of the channel from which the objects are extracted.
    compartment : str
        The name of the compartment from which the objects are extracted.
    objects : numpy.ndarray
        The labeled image containing the segmented objects.
    object_ids : numpy.ndarray
        The unique object IDs for the segmented objects.

    Methods
    -------
    __init__(image, label_image, channel_name, compartment_name)
        Initializes the ObjectLoader with the image, label image, channel name, and compartment name.
    """


[docs]
    def __init__(
        self,
        image: numpy.ndarray | None,
        label_image: numpy.ndarray,
        channel_name: str | None,
        compartment_name: str,
    ) -> None:
        """Initialize object loader with image and labels.

        Parameters
        ----------
        image : numpy.ndarray
            Image array used for measurements.
        label_image : numpy.ndarray
            Labeled segmentation mask.
        channel_name : str | None
            Channel name for the image.
        compartment_name : str
            Compartment name for the labels.
        """
        self.image = image
        self.label_image = label_image
        self.channel = channel_name
        self.compartment = compartment_name
        # get the labeled image objects
        self.object_ids = numpy.unique(label_image)
        # drop the 0 label
        self.object_ids = [x for x in self.object_ids if x != 0]





[docs]
class TwoObjectLoader:
    """
    A class to load two images and a label image for a specific compartment.
    This class is primarily used for loading images for two-channel analysis like co-localization.

    Parameters
    ----------
    image_set_loader : ImageSetLoader
        An instance of the ImageSetLoader class containing the image set.
    compartment : str
        The name of the compartment for which the label image is loaded.
    channel1 : str
        The name of the first channel to be loaded.
    channel2 : str
        The name of the second channel to be loaded.

    Attributes
    ----------
    image_set_loader : ImageSetLoader
        An instance of the ImageSetLoader class containing the image set.
    compartment : str
        The name of the compartment for which the label image is loaded.
    label_image : numpy.ndarray
        The labeled image containing the segmented objects for the specified compartment.
    image1 : numpy.ndarray
        The image corresponding to the first channel.
    image2 : numpy.ndarray
        The image corresponding to the second channel.
    object_ids : numpy.ndarray
        The unique object IDs for the segmented objects in the specified compartment.

    Methods
    -------
    __init__(image_set_loader, compartment, channel1, channel2)
        Initializes the TwoObjectLoader with the image set loader, compartment, and channel names.
    """


[docs]
    def __init__(
        self,
        image_set_loader: ImageSetLoader,
        compartment: str,
        channel1: str,
        channel2: str,
    ) -> None:
        """Initialize a two-channel loader for a compartment.

        Parameters
        ----------
        image_set_loader : ImageSetLoader
            Image set loader containing images and masks.
        compartment : str
            Compartment name for the label image.
        channel1 : str
            First channel name to load.
        channel2 : str
            Second channel name to load.
        """
        self.image_set_loader = image_set_loader
        self.compartment = compartment
        self.label_image = self.image_set_loader.image_set_dict[compartment].copy()
        self.image1 = self.image_set_loader.image_set_dict[channel1].copy()
        self.image2 = self.image_set_loader.image_set_dict[channel2].copy()
        self.object_ids = image_set_loader.unique_compartment_objects[compartment]