Source code for image_analysis_3D.featurization_utils.colocalization_utils

"""Colocalization feature extraction utilities for 3D image objects.

Computes per-object colocalization metrics (Pearson correlation, Manders
coefficients, overlap coefficient, K1/K2 coefficients) between pairs of
fluorescence channels using the Costes automatic thresholding method.
"""

from typing import Dict, Tuple

import numpy
import scipy.ndimage
import skimage
from image_analysis_3D.image_utils.image_utils import (
    crop_3D_image,
    new_crop_border,
    select_objects_from_label,
)



[docs]
def linear_costes_threshold_calculation(
    first_image: numpy.ndarray,
    second_image: numpy.ndarray,
    scale_max: int = 255,
    fast_costes: str = "Accurate",
) -> Tuple[float, float]:
    """
    Finds the Costes Automatic Threshold for colocalization using a linear algorithm.
    Candiate thresholds are gradually decreased until Pearson R falls below 0.
    If "Fast" mode is enabled the "steps" between tested thresholds will be increased
    when Pearson R is much greater than 0. The other mode is "Accurate" which
    will always step down by the same amount.

    Parameters
    ----------
    first_image : numpy.ndarray
        The first fluorescence image.
    second_image : numpy.ndarray
        The second fluorescence image.
    scale_max : int, optional
        The maximum value for the image scale, by default 255.
    fast_costes : str, optional
        The mode for the Costes threshold calculation, by default "Accurate".

    Returns
    -------
    Tuple[float, float]
        The calculated thresholds for the first and second images.
    """
    i_step = 1 / scale_max  # Step size for the threshold as a float
    non_zero = (first_image > 0) | (second_image > 0)
    xvar = numpy.var(first_image[non_zero], axis=0, ddof=1)
    yvar = numpy.var(second_image[non_zero], axis=0, ddof=1)

    xmean = numpy.mean(first_image[non_zero], axis=0)
    ymean = numpy.mean(second_image[non_zero], axis=0)

    z = first_image[non_zero] + second_image[non_zero]
    zvar = numpy.var(z, axis=0, ddof=1)

    covar = 0.5 * (zvar - (xvar + yvar))

    denom = 2 * covar
    num = (yvar - xvar) + numpy.sqrt(
        (yvar - xvar) * (yvar - xvar) + 4 * (covar * covar)
    )
    a = num / denom
    b = ymean - a * xmean

    # Start at 1 step above the maximum value
    img_max = max(first_image.max(), second_image.max())
    i = i_step * ((img_max // i_step) + 1)

    num_true = None
    first_image_max = first_image.max()
    second_image_max = second_image.max()

    # Initialise without a threshold
    costReg, _ = scipy.stats.pearsonr(first_image, second_image)
    thr_first_image_c = i
    thr_second_image_c = (a * i) + b
    while i > first_image_max and (a * i) + b > second_image_max:
        i -= i_step
    while i > i_step:
        thr_first_image_c = i
        thr_second_image_c = (a * i) + b
        combt = (first_image < thr_first_image_c) | (second_image < thr_second_image_c)
        try:
            # Only run pearsonr if the input has changed.
            if (positives := numpy.count_nonzero(combt)) != num_true:
                costReg, _ = scipy.stats.pearsonr(
                    first_image[combt], second_image[combt]
                )
                num_true = positives

            if costReg <= 0:
                break
            elif fast_costes == "Accurate" or i < i_step * 10:
                i -= i_step
            elif costReg > 0.45:
                # We're way off, step down 10x
                i -= i_step * 10
            elif costReg > 0.35:
                # Still far from 0, step 5x
                i -= i_step * 5
            elif costReg > 0.25:
                # Step 2x
                i -= i_step * 2
            else:
                i -= i_step
        except ValueError:
            break
    return thr_first_image_c, thr_second_image_c




[docs]
def bisection_costes_threshold_calculation(
    first_image: numpy.ndarray, second_image: numpy.ndarray, scale_max: int = 255
) -> tuple[float, float]:
    """
    Finds the Costes Automatic Threshold for colocalization using a bisection algorithm.
    Candidate thresholds are selected from within a window of possible intensities,
    this window is narrowed based on the R value of each tested candidate.
    We're looking for the first point at 0, and R value can become highly variable
    at lower thresholds in some samples. Therefore the candidate tested in each
    loop is 1/6th of the window size below the maximum value (as opposed to the midpoint).

    Parameters
    ----------
    first_image : numpy.ndarray
        The first fluorescence image.
    second_image : numpy.ndarray
        The second fluorescence image.
    scale_max : int, optional
        The maximum value for the image scale, by default 255.

    Returns
    -------
    Tuple[float, float]
        The calculated thresholds for the first and second images.
    """

    non_zero = (first_image > 0) | (second_image > 0)
    xvar = numpy.var(first_image[non_zero], axis=0, ddof=1)
    yvar = numpy.var(second_image[non_zero], axis=0, ddof=1)

    xmean = numpy.mean(first_image[non_zero], axis=0)
    ymean = numpy.mean(second_image[non_zero], axis=0)

    z = first_image[non_zero] + second_image[non_zero]
    zvar = numpy.var(z, axis=0, ddof=1)

    covar = 0.5 * (zvar - (xvar + yvar))

    denom = 2 * covar
    num = (yvar - xvar) + numpy.sqrt((yvar - xvar) * (yvar - xvar) + 4 * (covar**2))
    a = num / denom
    b = ymean - a * xmean

    # Initialise variables
    left = 1
    right = scale_max
    mid = ((right - left) // (6 / 5)) + left
    lastmid = 0
    # Marks the value with the last positive R value.
    valid = 1

    while lastmid != mid:
        thr_first_image_c = mid / scale_max
        thr_second_image_c = (a * thr_first_image_c) + b
        combt = (first_image < thr_first_image_c) | (second_image < thr_second_image_c)
        if numpy.count_nonzero(combt) <= 2:
            # Can't run meaningful pearson with only 2 values.
            left = mid - 1
        else:
            try:
                costReg, _ = scipy.stats.pearsonr(
                    first_image[combt], second_image[combt]
                )
                if costReg < 0:
                    left = mid - 1
                elif costReg >= 0:
                    right = mid + 1
                    valid = mid
            except ValueError:
                # Catch misc Pearson errors with low sample numbers
                left = mid - 1
        lastmid = mid
        if right - left > 6:
            mid = ((right - left) // (6 / 5)) + left
        else:
            mid = ((right - left) // 2) + left

    thr_first_image_c = (valid - 1) / scale_max
    thr_second_image_c = (a * thr_first_image_c) + b

    return thr_first_image_c, thr_second_image_c




[docs]
def prepare_two_images_for_colocalization(
    label_object1: numpy.ndarray,
    label_object2: numpy.ndarray,
    image_object1: numpy.ndarray,
    image_object2: numpy.ndarray,
    object_id1: int,
    object_id2: int,
) -> Tuple[numpy.ndarray, numpy.ndarray]:
    """
    This function prepares two images for colocalization analysis by cropping them to the bounding boxes of the specified objects.
    It selects the objects from the label images, calculates their bounding boxes, and crops the images accordingly.

    Parameters
    ----------
    label_object1 : numpy.ndarray
        The segmented label image for the first object.
    label_object2 : numpy.ndarray
        The segmented label image for the second object.
    image_object1 : numpy.ndarray
        The spectral image to crop for the first object.
    image_object2 : numpy.ndarray
        The spectral image to crop for the second object.
    object_id1 : int
        The object index to select from the label image for the first object.
    object_id2 : int
        The object index to select from the label image for the second object.

    Returns
    -------
    Tuple[numpy.ndarray, numpy.ndarray]
        The two cropped images for colocalization analysis.
    """
    label_object1 = select_objects_from_label(label_object1, object_id1)
    label_object2 = select_objects_from_label(label_object2, object_id2)
    # get the image bbox
    props_image1 = skimage.measure.regionprops_table(label_object1, properties=["bbox"])
    bbox_image1 = (
        props_image1["bbox-0"][0],  # z min
        props_image1["bbox-1"][0],  # y min
        props_image1["bbox-2"][0],  # x min
        props_image1["bbox-3"][0],  # z max
        props_image1["bbox-4"][0],  # y max
        props_image1["bbox-5"][0],  # x max
    )

    props_image2 = skimage.measure.regionprops_table(label_object2, properties=["bbox"])
    bbox_image2 = (
        props_image2["bbox-0"][0],  # z min
        props_image2["bbox-1"][0],  # y min
        props_image2["bbox-2"][0],  # x min
        props_image2["bbox-3"][0],  # z max
        props_image2["bbox-4"][0],  # y max
        props_image2["bbox-5"][0],  # x max
    )

    new_bbox1, new_bbox2 = new_crop_border(bbox_image1, bbox_image2, image_object1)

    cropped_image_1 = crop_3D_image(image_object1, new_bbox1)
    cropped_image_2 = crop_3D_image(image_object2, new_bbox2)
    return cropped_image_1, cropped_image_2




[docs]
def measure_3D_colocalization(
    cropped_image_1: numpy.ndarray,
    cropped_image_2: numpy.ndarray,
    thr: int = 15,
    fast_costes: str = "Accurate",
) -> Dict[str, float]:
    """
    This function calculates the colocalization coefficients between two images.
    It computes the correlation coefficient, Manders' coefficients, overlap coefficient,
    and Costes' coefficients. The results are returned as a dictionary.

    Parameters
    ----------
    cropped_image_1 : numpy.ndarray
        The first cropped image.
    cropped_image_2 : numpy.ndarray
        The second cropped image.
    thr : int, optional
        The threshold for the Manders' coefficients, by default 15
    fast_costes : str, optional
        The mode for Costes' threshold calculation, by default "Accurate".
        Options are "Accurate" or "Fast".
        "Accurate" uses a linear algorithm, while "Fast" uses a bisection algorithm.
        The "Fast" mode is faster but less accurate.

    Returns
    -------
    Dict[str, float]
        The output features for colocalization analysis.
    """
    results = {}
    ################################################################################################
    # Calculate the correlation coefficient between the two images
    # This is the Pearson correlation coefficient
    # Pearson correlation coeffecient = cov(X, Y) / (std(X) * std(Y))
    # where cov(X, Y) is the covariance of X and Y
    # where X and Y are the two images
    # std(X) is the standard deviation of X
    # std(Y) is the standard deviation of Y
    # cov(X, Y) = sum((X - mean(X)) * (Y - mean(Y))) / (N - 1)
    # std(X) = sqrt(sum((X - mean(X)) ** 2) / (N - 1))
    # thus N -1 cancels out in the calculation below
    ################################################################################################
    mean1 = numpy.mean(cropped_image_1)
    mean2 = numpy.mean(cropped_image_2)
    std1 = numpy.sqrt(numpy.sum((cropped_image_1 - mean1) ** 2))
    std2 = numpy.sqrt(numpy.sum((cropped_image_2 - mean2) ** 2))
    x = cropped_image_1 - mean1  # x is not the same as the x dimension here
    y = cropped_image_2 - mean2  # y is not the same as the y dimension here
    corr = numpy.sum(x * y) / (std1 * std2)

    ################################################################################################
    # Calculate the Manders' coefficients
    ################################################################################################

    # Threshold as percentage of maximum intensity of objects in each channel
    try:
        tff = (thr / 100) * numpy.max(cropped_image_1)
        tss = (thr / 100) * numpy.max(cropped_image_2)
        # Ensure thresholds are at least 1 to avoid zero thresholding
        # if an errors occurs this is probably due to empty images
        # or images where the bbox is incredibly small and inconsistent
        # or the bbox is on the border of the image
        # in which case we want to remove anyway
    except ValueError:
        M1, M2 = 0.0, 0.0
    else:
        # get the thresholds
        combined_thresh = (cropped_image_1 >= tff) & (cropped_image_2 >= tss)

        first_image_thresh = cropped_image_1[combined_thresh]
        second_image_thresh = cropped_image_2[combined_thresh]

        tot_first_image_thr = scipy.ndimage.sum(
            cropped_image_1[cropped_image_1 >= tff],
        )
        tot_second_image_thr = scipy.ndimage.sum(
            cropped_image_2[cropped_image_2 >= tss]
        )

        if tot_first_image_thr > 0 and tot_second_image_thr > 0:
            M1 = scipy.ndimage.sum(first_image_thresh) / tot_first_image_thr
            M2 = scipy.ndimage.sum(second_image_thresh) / tot_second_image_thr
        else:
            M1, M2 = 0.0, 0.0
    ################################################################################################
    # Calculate the overlap coefficient
    ################################################################################################

    fpsq = scipy.ndimage.sum(
        cropped_image_1[combined_thresh] ** 2,
    )
    spsq = scipy.ndimage.sum(
        cropped_image_2[combined_thresh] ** 2,
    )
    pdt = numpy.sqrt(numpy.array(fpsq) * numpy.array(spsq))
    overlap = (
        scipy.ndimage.sum(
            cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh],
        )
        / pdt
    )
    K1 = scipy.ndimage.sum(
        cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh],
    ) / (numpy.array(fpsq))
    K2 = scipy.ndimage.sum(
        cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh],
    ) / (numpy.array(spsq))

    # first_pixels, second_pixels = flattened image arrays
    # combined_thresh = boolean mask of pixels above threshold in both channels
    # fi_thresh, si_thresh = thresholded intensities (same shape as pixels)

    # --- Rank computation ---
    # Flatten images for ranking
    img1_flat = cropped_image_1.flatten()
    img2_flat = cropped_image_2.flatten()

    # --- Rank computation ---
    sorted_idx_1 = numpy.argsort(img1_flat)
    sorted_idx_2 = numpy.argsort(img2_flat)

    # Create rank arrays
    rank_1_flat = numpy.empty_like(sorted_idx_1, dtype=float)
    rank_2_flat = numpy.empty_like(sorted_idx_2, dtype=float)
    rank_1_flat[sorted_idx_1] = numpy.arange(len(sorted_idx_1))
    rank_2_flat[sorted_idx_2] = numpy.arange(len(sorted_idx_2))

    # Reshape back to original shape
    rank_im1 = rank_1_flat.reshape(cropped_image_1.shape)
    rank_im2 = rank_2_flat.reshape(cropped_image_2.shape)

    # --- Rank difference weight ---
    R = max(rank_im1.max(), rank_im2.max()) + 1
    Di = numpy.abs(rank_im1 - rank_im2)
    weight = (R - Di) / R

    # Get weights for thresholded pixels
    weight_thresh = weight[combined_thresh]

    # Get thresholded values (no double-thresholding!)
    first_image_thresh_final = first_image_thresh
    second_image_thresh_final = second_image_thresh

    # --- Calculate weighted colocalization ---
    if numpy.any(combined_thresh) and len(first_image_thresh_final) > 0:
        weighted_sum_1 = numpy.sum(first_image_thresh_final * weight_thresh)
        weighted_sum_2 = numpy.sum(second_image_thresh_final * weight_thresh)

        total_1 = numpy.sum(first_image_thresh_final)
        total_2 = numpy.sum(second_image_thresh_final)

        RWC1 = weighted_sum_1 / total_1 if total_1 > 0 else 0.0
        RWC2 = weighted_sum_2 / total_2 if total_2 > 0 else 0.0
    else:
        RWC1, RWC2 = 0.0, 0.0
    ################################################################################################
    # Calculate the Costes' coefficient
    ################################################################################################

    # Orthogonal Regression for Costes' automated threshold
    if numpy.max(cropped_image_1) > 255 or numpy.max(cropped_image_2) > 255:
        scale = 65535
    else:
        scale = 255

    if fast_costes == "Accurate":
        thr_first_image_c, thr_second_image_c = bisection_costes_threshold_calculation(
            cropped_image_1, cropped_image_2, scale
        )
    else:
        thr_first_image_c, thr_second_image_c = linear_costes_threshold_calculation(
            cropped_image_1, cropped_image_2, scale, fast_costes
        )

    # Costes' thershold for entire image is applied to each object
    first_image_above_thr = cropped_image_1 > thr_first_image_c
    second_image_above_thr = cropped_image_2 > thr_second_image_c
    combined_thresh_c = first_image_above_thr & second_image_above_thr
    first_image_thresh_c = cropped_image_1[combined_thresh_c]
    second_image_thresh_c = cropped_image_2[combined_thresh_c]

    tot_first_image_thr_c = scipy.ndimage.sum(
        cropped_image_1[cropped_image_1 >= thr_first_image_c],
    )

    tot_second_image_thr_c = scipy.ndimage.sum(
        cropped_image_2[cropped_image_2 >= thr_second_image_c],
    )
    if tot_first_image_thr_c > 0 and tot_second_image_thr_c > 0:
        C1 = scipy.ndimage.sum(first_image_thresh_c) / tot_first_image_thr_c
        C2 = scipy.ndimage.sum(second_image_thresh_c) / tot_second_image_thr_c
    else:
        C1, C2 = 0.0, 0.0
    ################################################################################################
    # write the results to the output dictionary
    ################################################################################################

    results["Correlation"] = corr
    results["MandersCoeffM1"] = M1
    results["MandersCoeffM2"] = M2
    results["OverlapCoeff"] = overlap
    results["MandersCoeffCostesM1"] = C1
    results["MandersCoeffCostesM2"] = C2
    results["RankWeightedColocalizationCoeff1"] = RWC1
    results["RankWeightedColocalizationCoeff2"] = RWC2

    return results