Source code for image_analysis_3D.featurization_utils.colocalization_utils

"""Colocalization feature extraction utilities for 3D image objects.

Computes per-object colocalization metrics (Pearson correlation, Manders
coefficients, overlap coefficient, K1/K2 coefficients) between pairs of
fluorescence channels using the Costes automatic thresholding method.
"""

from typing import Dict, Tuple

import numpy
import scipy.ndimage
import skimage
from image_analysis_3D.image_utils.image_utils import (
    crop_3D_image,
    new_crop_border,
    select_objects_from_label,
)


[docs] def linear_costes_threshold_calculation( first_image: numpy.ndarray, second_image: numpy.ndarray, scale_max: int = 255, fast_costes: str = "Accurate", ) -> Tuple[float, float]: """ Finds the Costes Automatic Threshold for colocalization using a linear algorithm. Candiate thresholds are gradually decreased until Pearson R falls below 0. If "Fast" mode is enabled the "steps" between tested thresholds will be increased when Pearson R is much greater than 0. The other mode is "Accurate" which will always step down by the same amount. Parameters ---------- first_image : numpy.ndarray The first fluorescence image. second_image : numpy.ndarray The second fluorescence image. scale_max : int, optional The maximum value for the image scale, by default 255. fast_costes : str, optional The mode for the Costes threshold calculation, by default "Accurate". Returns ------- Tuple[float, float] The calculated thresholds for the first and second images. """ i_step = 1 / scale_max # Step size for the threshold as a float non_zero = (first_image > 0) | (second_image > 0) xvar = numpy.var(first_image[non_zero], axis=0, ddof=1) yvar = numpy.var(second_image[non_zero], axis=0, ddof=1) xmean = numpy.mean(first_image[non_zero], axis=0) ymean = numpy.mean(second_image[non_zero], axis=0) z = first_image[non_zero] + second_image[non_zero] zvar = numpy.var(z, axis=0, ddof=1) covar = 0.5 * (zvar - (xvar + yvar)) denom = 2 * covar num = (yvar - xvar) + numpy.sqrt( (yvar - xvar) * (yvar - xvar) + 4 * (covar * covar) ) a = num / denom b = ymean - a * xmean # Start at 1 step above the maximum value img_max = max(first_image.max(), second_image.max()) i = i_step * ((img_max // i_step) + 1) num_true = None first_image_max = first_image.max() second_image_max = second_image.max() # Initialise without a threshold costReg, _ = scipy.stats.pearsonr(first_image, second_image) thr_first_image_c = i thr_second_image_c = (a * i) + b while i > first_image_max and (a * i) + b > second_image_max: i -= i_step while i > i_step: thr_first_image_c = i thr_second_image_c = (a * i) + b combt = (first_image < thr_first_image_c) | (second_image < thr_second_image_c) try: # Only run pearsonr if the input has changed. if (positives := numpy.count_nonzero(combt)) != num_true: costReg, _ = scipy.stats.pearsonr( first_image[combt], second_image[combt] ) num_true = positives if costReg <= 0: break elif fast_costes == "Accurate" or i < i_step * 10: i -= i_step elif costReg > 0.45: # We're way off, step down 10x i -= i_step * 10 elif costReg > 0.35: # Still far from 0, step 5x i -= i_step * 5 elif costReg > 0.25: # Step 2x i -= i_step * 2 else: i -= i_step except ValueError: break return thr_first_image_c, thr_second_image_c
[docs] def bisection_costes_threshold_calculation( first_image: numpy.ndarray, second_image: numpy.ndarray, scale_max: int = 255 ) -> tuple[float, float]: """ Finds the Costes Automatic Threshold for colocalization using a bisection algorithm. Candidate thresholds are selected from within a window of possible intensities, this window is narrowed based on the R value of each tested candidate. We're looking for the first point at 0, and R value can become highly variable at lower thresholds in some samples. Therefore the candidate tested in each loop is 1/6th of the window size below the maximum value (as opposed to the midpoint). Parameters ---------- first_image : numpy.ndarray The first fluorescence image. second_image : numpy.ndarray The second fluorescence image. scale_max : int, optional The maximum value for the image scale, by default 255. Returns ------- Tuple[float, float] The calculated thresholds for the first and second images. """ non_zero = (first_image > 0) | (second_image > 0) xvar = numpy.var(first_image[non_zero], axis=0, ddof=1) yvar = numpy.var(second_image[non_zero], axis=0, ddof=1) xmean = numpy.mean(first_image[non_zero], axis=0) ymean = numpy.mean(second_image[non_zero], axis=0) z = first_image[non_zero] + second_image[non_zero] zvar = numpy.var(z, axis=0, ddof=1) covar = 0.5 * (zvar - (xvar + yvar)) denom = 2 * covar num = (yvar - xvar) + numpy.sqrt((yvar - xvar) * (yvar - xvar) + 4 * (covar**2)) a = num / denom b = ymean - a * xmean # Initialise variables left = 1 right = scale_max mid = ((right - left) // (6 / 5)) + left lastmid = 0 # Marks the value with the last positive R value. valid = 1 while lastmid != mid: thr_first_image_c = mid / scale_max thr_second_image_c = (a * thr_first_image_c) + b combt = (first_image < thr_first_image_c) | (second_image < thr_second_image_c) if numpy.count_nonzero(combt) <= 2: # Can't run meaningful pearson with only 2 values. left = mid - 1 else: try: costReg, _ = scipy.stats.pearsonr( first_image[combt], second_image[combt] ) if costReg < 0: left = mid - 1 elif costReg >= 0: right = mid + 1 valid = mid except ValueError: # Catch misc Pearson errors with low sample numbers left = mid - 1 lastmid = mid if right - left > 6: mid = ((right - left) // (6 / 5)) + left else: mid = ((right - left) // 2) + left thr_first_image_c = (valid - 1) / scale_max thr_second_image_c = (a * thr_first_image_c) + b return thr_first_image_c, thr_second_image_c
[docs] def prepare_two_images_for_colocalization( label_object1: numpy.ndarray, label_object2: numpy.ndarray, image_object1: numpy.ndarray, image_object2: numpy.ndarray, object_id1: int, object_id2: int, ) -> Tuple[numpy.ndarray, numpy.ndarray]: """ This function prepares two images for colocalization analysis by cropping them to the bounding boxes of the specified objects. It selects the objects from the label images, calculates their bounding boxes, and crops the images accordingly. Parameters ---------- label_object1 : numpy.ndarray The segmented label image for the first object. label_object2 : numpy.ndarray The segmented label image for the second object. image_object1 : numpy.ndarray The spectral image to crop for the first object. image_object2 : numpy.ndarray The spectral image to crop for the second object. object_id1 : int The object index to select from the label image for the first object. object_id2 : int The object index to select from the label image for the second object. Returns ------- Tuple[numpy.ndarray, numpy.ndarray] The two cropped images for colocalization analysis. """ label_object1 = select_objects_from_label(label_object1, object_id1) label_object2 = select_objects_from_label(label_object2, object_id2) # get the image bbox props_image1 = skimage.measure.regionprops_table(label_object1, properties=["bbox"]) bbox_image1 = ( props_image1["bbox-0"][0], # z min props_image1["bbox-1"][0], # y min props_image1["bbox-2"][0], # x min props_image1["bbox-3"][0], # z max props_image1["bbox-4"][0], # y max props_image1["bbox-5"][0], # x max ) props_image2 = skimage.measure.regionprops_table(label_object2, properties=["bbox"]) bbox_image2 = ( props_image2["bbox-0"][0], # z min props_image2["bbox-1"][0], # y min props_image2["bbox-2"][0], # x min props_image2["bbox-3"][0], # z max props_image2["bbox-4"][0], # y max props_image2["bbox-5"][0], # x max ) new_bbox1, new_bbox2 = new_crop_border(bbox_image1, bbox_image2, image_object1) cropped_image_1 = crop_3D_image(image_object1, new_bbox1) cropped_image_2 = crop_3D_image(image_object2, new_bbox2) return cropped_image_1, cropped_image_2
[docs] def measure_3D_colocalization( cropped_image_1: numpy.ndarray, cropped_image_2: numpy.ndarray, thr: int = 15, fast_costes: str = "Accurate", ) -> Dict[str, float]: """ This function calculates the colocalization coefficients between two images. It computes the correlation coefficient, Manders' coefficients, overlap coefficient, and Costes' coefficients. The results are returned as a dictionary. Parameters ---------- cropped_image_1 : numpy.ndarray The first cropped image. cropped_image_2 : numpy.ndarray The second cropped image. thr : int, optional The threshold for the Manders' coefficients, by default 15 fast_costes : str, optional The mode for Costes' threshold calculation, by default "Accurate". Options are "Accurate" or "Fast". "Accurate" uses a linear algorithm, while "Fast" uses a bisection algorithm. The "Fast" mode is faster but less accurate. Returns ------- Dict[str, float] The output features for colocalization analysis. """ results = {} ################################################################################################ # Calculate the correlation coefficient between the two images # This is the Pearson correlation coefficient # Pearson correlation coeffecient = cov(X, Y) / (std(X) * std(Y)) # where cov(X, Y) is the covariance of X and Y # where X and Y are the two images # std(X) is the standard deviation of X # std(Y) is the standard deviation of Y # cov(X, Y) = sum((X - mean(X)) * (Y - mean(Y))) / (N - 1) # std(X) = sqrt(sum((X - mean(X)) ** 2) / (N - 1)) # thus N -1 cancels out in the calculation below ################################################################################################ mean1 = numpy.mean(cropped_image_1) mean2 = numpy.mean(cropped_image_2) std1 = numpy.sqrt(numpy.sum((cropped_image_1 - mean1) ** 2)) std2 = numpy.sqrt(numpy.sum((cropped_image_2 - mean2) ** 2)) x = cropped_image_1 - mean1 # x is not the same as the x dimension here y = cropped_image_2 - mean2 # y is not the same as the y dimension here corr = numpy.sum(x * y) / (std1 * std2) ################################################################################################ # Calculate the Manders' coefficients ################################################################################################ # Threshold as percentage of maximum intensity of objects in each channel try: tff = (thr / 100) * numpy.max(cropped_image_1) tss = (thr / 100) * numpy.max(cropped_image_2) # Ensure thresholds are at least 1 to avoid zero thresholding # if an errors occurs this is probably due to empty images # or images where the bbox is incredibly small and inconsistent # or the bbox is on the border of the image # in which case we want to remove anyway except ValueError: M1, M2 = 0.0, 0.0 else: # get the thresholds combined_thresh = (cropped_image_1 >= tff) & (cropped_image_2 >= tss) first_image_thresh = cropped_image_1[combined_thresh] second_image_thresh = cropped_image_2[combined_thresh] tot_first_image_thr = scipy.ndimage.sum( cropped_image_1[cropped_image_1 >= tff], ) tot_second_image_thr = scipy.ndimage.sum( cropped_image_2[cropped_image_2 >= tss] ) if tot_first_image_thr > 0 and tot_second_image_thr > 0: M1 = scipy.ndimage.sum(first_image_thresh) / tot_first_image_thr M2 = scipy.ndimage.sum(second_image_thresh) / tot_second_image_thr else: M1, M2 = 0.0, 0.0 ################################################################################################ # Calculate the overlap coefficient ################################################################################################ fpsq = scipy.ndimage.sum( cropped_image_1[combined_thresh] ** 2, ) spsq = scipy.ndimage.sum( cropped_image_2[combined_thresh] ** 2, ) pdt = numpy.sqrt(numpy.array(fpsq) * numpy.array(spsq)) overlap = ( scipy.ndimage.sum( cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], ) / pdt ) K1 = scipy.ndimage.sum( cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], ) / (numpy.array(fpsq)) K2 = scipy.ndimage.sum( cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], ) / (numpy.array(spsq)) # first_pixels, second_pixels = flattened image arrays # combined_thresh = boolean mask of pixels above threshold in both channels # fi_thresh, si_thresh = thresholded intensities (same shape as pixels) # --- Rank computation --- # Flatten images for ranking img1_flat = cropped_image_1.flatten() img2_flat = cropped_image_2.flatten() # --- Rank computation --- sorted_idx_1 = numpy.argsort(img1_flat) sorted_idx_2 = numpy.argsort(img2_flat) # Create rank arrays rank_1_flat = numpy.empty_like(sorted_idx_1, dtype=float) rank_2_flat = numpy.empty_like(sorted_idx_2, dtype=float) rank_1_flat[sorted_idx_1] = numpy.arange(len(sorted_idx_1)) rank_2_flat[sorted_idx_2] = numpy.arange(len(sorted_idx_2)) # Reshape back to original shape rank_im1 = rank_1_flat.reshape(cropped_image_1.shape) rank_im2 = rank_2_flat.reshape(cropped_image_2.shape) # --- Rank difference weight --- R = max(rank_im1.max(), rank_im2.max()) + 1 Di = numpy.abs(rank_im1 - rank_im2) weight = (R - Di) / R # Get weights for thresholded pixels weight_thresh = weight[combined_thresh] # Get thresholded values (no double-thresholding!) first_image_thresh_final = first_image_thresh second_image_thresh_final = second_image_thresh # --- Calculate weighted colocalization --- if numpy.any(combined_thresh) and len(first_image_thresh_final) > 0: weighted_sum_1 = numpy.sum(first_image_thresh_final * weight_thresh) weighted_sum_2 = numpy.sum(second_image_thresh_final * weight_thresh) total_1 = numpy.sum(first_image_thresh_final) total_2 = numpy.sum(second_image_thresh_final) RWC1 = weighted_sum_1 / total_1 if total_1 > 0 else 0.0 RWC2 = weighted_sum_2 / total_2 if total_2 > 0 else 0.0 else: RWC1, RWC2 = 0.0, 0.0 ################################################################################################ # Calculate the Costes' coefficient ################################################################################################ # Orthogonal Regression for Costes' automated threshold if numpy.max(cropped_image_1) > 255 or numpy.max(cropped_image_2) > 255: scale = 65535 else: scale = 255 if fast_costes == "Accurate": thr_first_image_c, thr_second_image_c = bisection_costes_threshold_calculation( cropped_image_1, cropped_image_2, scale ) else: thr_first_image_c, thr_second_image_c = linear_costes_threshold_calculation( cropped_image_1, cropped_image_2, scale, fast_costes ) # Costes' thershold for entire image is applied to each object first_image_above_thr = cropped_image_1 > thr_first_image_c second_image_above_thr = cropped_image_2 > thr_second_image_c combined_thresh_c = first_image_above_thr & second_image_above_thr first_image_thresh_c = cropped_image_1[combined_thresh_c] second_image_thresh_c = cropped_image_2[combined_thresh_c] tot_first_image_thr_c = scipy.ndimage.sum( cropped_image_1[cropped_image_1 >= thr_first_image_c], ) tot_second_image_thr_c = scipy.ndimage.sum( cropped_image_2[cropped_image_2 >= thr_second_image_c], ) if tot_first_image_thr_c > 0 and tot_second_image_thr_c > 0: C1 = scipy.ndimage.sum(first_image_thresh_c) / tot_first_image_thr_c C2 = scipy.ndimage.sum(second_image_thresh_c) / tot_second_image_thr_c else: C1, C2 = 0.0, 0.0 ################################################################################################ # write the results to the output dictionary ################################################################################################ results["Correlation"] = corr results["MandersCoeffM1"] = M1 results["MandersCoeffM2"] = M2 results["OverlapCoeff"] = overlap results["MandersCoeffCostesM1"] = C1 results["MandersCoeffCostesM2"] = C2 results["RankWeightedColocalizationCoeff1"] = RWC1 results["RankWeightedColocalizationCoeff2"] = RWC2 return results