Source code for image_analysis_3D.file_utils.segmentation_decoupling

"""Utilities for decoupling and merging segmentation masks."""

import numpy as np
import pandas as pd
import skimage


[docs] def euclidian_2D_distance( coord_set_1: tuple[float, float], coord_set_2: tuple[float, float] ) -> float: """ This function calculates the euclidian distance between two sets of coordinates (2D) sqrt((x1 - x2)^2 + (y1 - y2)^2) Parameters ---------- coord_set_1 : tuple The first set of coordinates (x, y) coord_set_2 : tuple The second set of coordinates (x, y) Returns ------- float The euclidian distance between the two sets of coordinates """ return np.sqrt( (coord_set_1[0] - coord_set_2[0]) ** 2 + (coord_set_1[1] - coord_set_2[1]) ** 2 )
[docs] def check_coordinate_inside_box( coord: tuple[float, float], box: tuple[float, float, float, float], ) -> bool: """ This function checks if a coordinate is inside a box Parameters ---------- coord : tuple The coordinate to check (y, x) box : tuple The box to check against [y_min, x_min, y_max, x_max] Returns ------- bool True if the coordinate is inside the box, False otherwise """ # check if coords and box are valid if not isinstance(coord, tuple): raise TypeError("coord must be a tuple") if not isinstance(box, tuple): raise TypeError("box must be a list") if not len(box) == 4: raise ValueError("box must be a list of length 4") if not len(coord) == 2: raise ValueError("coord must be a tuple of length 2") y_coord = coord[0] x_coord = coord[1] y_min = box[0] x_min = box[1] y_max = box[2] x_max = box[3] if x_coord >= x_min and x_coord <= x_max and y_coord >= y_min and y_coord <= y_max: return True else: return False
[docs] def get_larger_bbox( bbox1: tuple[float, float, float, float], bbox2: tuple[float, float, float, float], ) -> tuple[float, float, float, float]: """ This function returns the larger of two bounding boxes Parameters ---------- bbox1 : tuple The first bounding box [y_min, x_min, y_max, x_max] bbox2 : tuple The second bounding box [y_min, x_min, y_max, x_max] Returns ------- tuple A tuple of the larger bounding box [y_min, x_min, y_max, x_max] """ # check if boxes are valid if not isinstance(bbox1, tuple): raise TypeError("bbox1 must be a tuple") if not isinstance(bbox2, tuple): raise TypeError("bbox2 must be a tuple") if not len(bbox1) == 4: raise ValueError("bbox1 must be a list of length 4") if not len(bbox2) == 4: raise ValueError("bbox2 must be a list of length 4") bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]) bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]) if bbox1_area >= bbox2_area: return bbox1 elif bbox2_area >= bbox1_area: return bbox2
[docs] def extract_unique_masks(image_stack: np.ndarray) -> pd.DataFrame: """ This function extracts unique masks from an image stack Parameters ---------- image_stack : np.ndarray The image stack to extract unique masks from Returns ------- pd.DataFrame The dataframe containing the unique masks """ mask_indices_dict = { "unique_mask": [], "mask_indices": [], "pseudo_slice": [], "y_x_coords": [], "bbox": [], "merged/lone": [], } # find each unqiue mask identity via pixel value unique_masks = np.unique(image_stack) # loop through each unique mask identity # drop the backgound as a mask identity unique_masks = unique_masks[unique_masks != 0] for unique_mask in unique_masks: # loop through each mask image for pseudo_slice in range(len(image_stack)): # find where the unique mask identity is in the mask image tmp_image = image_stack[pseudo_slice] # get only the unique mask image_for_coordinates = np.array(tmp_image == unique_mask, dtype=np.uint8) mask_indices = np.where(tmp_image == unique_mask) # if the mask identity is in the mask image mask_indices_dict["unique_mask"].append(unique_mask) mask_indices_dict["mask_indices"].append(mask_indices) mask_indices_dict["pseudo_slice"].append(pseudo_slice) c = skimage.measure.regionprops(image_for_coordinates) for property in c: centroid = property.centroid bbox = property.bbox mask_indices_dict["y_x_coords"].append(centroid) mask_indices_dict["bbox"].append(bbox) if len(mask_indices_dict["y_x_coords"]) < len( mask_indices_dict["mask_indices"] ): mask_indices_dict["y_x_coords"].append(None) mask_indices_dict["bbox"].append(None) # check if the mask is merged or lone mask_indices_dict["merged/lone"].append(None) unique_masks_df = pd.DataFrame(mask_indices_dict) return unique_masks_df
[docs] def compare_masks_for_merged( df: pd.DataFrame, index1: int, index2: int, distance_threshold: int = 10, ) -> pd.DataFrame: """ This function compares masks for merging Parameters ---------- df : pd.DataFrame The dataframe containing the masks index1 : int Index 1 index2 : int Index 2 distance_threshold : int, optional The distance threshold, by default 10 Returns ------- pd.DataFrame The dataframe containing the masks for merging """ output_dict = { "unique_mask": [], "mask_indices": [], "pseudo_slice": [], "y_x_coords": [], "bbox": [], "merged/lone": [], "area": [], "index_comparison": [], } number_of_pseudo_slices = len(df["pseudo_slice"].unique()) # get the x_y coordinates and bbox of the other index current_xy = df["y_x_coords"][index1] current_bbox = df["bbox"][index1] other_xy = df["y_x_coords"][index2] other_bbox = df["bbox"][index2] if not number_of_pseudo_slices < 2: # if the x_y coordinates are not None if other_xy is not None and current_xy is not None: distance = euclidian_2D_distance(current_xy, other_xy) if distance < distance_threshold: # check which bounding box has the larger area if get_larger_bbox(current_bbox, other_bbox) == current_bbox: # check if the x_y coordinates are within the bbox of the other coordinates coordinate_inside_box = check_coordinate_inside_box( other_xy, current_bbox ) if coordinate_inside_box: output_dict["unique_mask"].append(index1) output_dict["mask_indices"].append(df["mask_indices"][index1]) elif get_larger_bbox(current_bbox, other_bbox) == other_bbox: coordinate_inside_box = check_coordinate_inside_box( current_xy, other_bbox ) if coordinate_inside_box: output_dict["unique_mask"].append(index2) output_dict["mask_indices"].append(df["mask_indices"][index2]) else: coordinate_inside_box = False # check if the x_y coordinates are within the bbox of the other coordinates if coordinate_inside_box: # add the indices to the output dict output_dict["pseudo_slice"].append(df["pseudo_slice"][index1]) output_dict["y_x_coords"].append(other_xy) output_dict["bbox"].append(other_bbox) mask_array = df["mask_indices"][index2] # get the area of the mask mask_array = np.array(mask_array) area = mask_array.shape[0] * mask_array.shape[1] output_dict["area"].append(area) output_dict["index_comparison"].append(f"{index1},{index2}") output_dict["merged/lone"].append("merged") else: output_dict["unique_mask"].append(index1) output_dict["mask_indices"].append(df["mask_indices"][index1]) output_dict["pseudo_slice"].append(df["pseudo_slice"][index1]) output_dict["y_x_coords"].append(current_xy) output_dict["bbox"].append(current_bbox) output_dict["area"].append(1) output_dict["index_comparison"].append(f"{index1},{index2}") output_dict["merged/lone"].append("lone") new_df = pd.DataFrame(output_dict) return new_df
# get each combination of indices
[docs] def get_combinations_of_indices( df: pd.DataFrame, distance_threshold: int = 10 ) -> pd.DataFrame: """ This function gets the combinations of indices Parameters ---------- df : pd.DataFrame The dataframe containing the masks distance_threshold : int, optional The distance threshold, by default 10 Returns ------- pd.DataFrame The dataframe containing the combinations of indices """ original_shape = df.shape[0] if original_shape < 2: return df list_of_dfs = [] for index1 in range(len(df)): for index2 in range(1 + index1, len(df)): new_df = compare_masks_for_merged( df, index1, index2, distance_threshold=distance_threshold ) list_of_dfs.append(new_df) dfs = pd.concat(list_of_dfs).reset_index(drop=True) # get only the merged masks if dfs["pseudo_slice"].nunique() < 2: merged_df = dfs elif not dfs.shape[0] < 2: merged_df = dfs[dfs["merged/lone"] == "merged"] else: merged_df = dfs if merged_df.shape[0] < 2: return merged_df merged_df = merged_df.assign( index1=merged_df["index_comparison"].str.split(",", expand=True)[0].astype(int), index2=merged_df["index_comparison"].str.split(",", expand=True)[1].astype(int), ) merged_df.reset_index(drop=True, inplace=True) return merged_df
[docs] def merge_sets(list_of_sets: list[set[int]]) -> tuple[list[set[int]], int]: """Merge overlapping sets in-place and count merges. Parameters ---------- list_of_sets : list[set[int]] Sets of integer labels to merge. Returns ------- tuple[list[set[int]], int] Updated list of sets and the number of merges performed. """ counter = 0 for i, set1 in enumerate(list_of_sets): for j, set2 in enumerate(list_of_sets): if i != j and len(set1.intersection(set2)) > 0: set1.update(set2) list_of_sets.remove(set2) counter += 1 return list_of_sets, counter
# if 0 merges with 1 and 0 merges with 2, then 1 and 2 are merged
[docs] def merge_sets_df(merged_df: pd.DataFrame) -> pd.DataFrame: """ This function merges the sets of masks Parameters ---------- merged_df : pd.DataFrame The dataframe containing the masks Returns ------- pd.DataFrame The dataframe containing the merged masks """ if merged_df.shape[0] < 2: merged_df["label"] = 1 return merged_df index_sets = merged_df["index_comparison"] # convert to list of sets list_of_sets = [set(map(int, x.split(","))) for x in index_sets] counter = 1 while counter > 0: list_of_sets, counter = merge_sets(list_of_sets) merged_sets_dict = {} for i in range(len(list_of_sets)): merged_sets_dict[i] = list_of_sets[i] for row in merged_df.iterrows(): for num_set in merged_sets_dict: if int(row[1]["index1"]) in merged_sets_dict[num_set]: merged_df.at[row[0], "label"] = num_set + 1 list_of_dfs = [] # if nan values are present in the label column, then set to 0 merged_df["label"] = merged_df["label"].fillna(0) if len(merged_df["label"].unique()) < 1: merged_df["label"] = 0 else: for unique_label in merged_df["label"].unique(): tmp_df = merged_df[merged_df["label"] == unique_label] # keep one row that has the largest area largest_area_index = tmp_df["area"].idxmax() tmp_df = tmp_df.loc[largest_area_index] list_of_dfs.append(tmp_df) merged_df = pd.DataFrame(list_of_dfs) return merged_df
[docs] def reassemble_each_mask( df: pd.DataFrame, original_img_shape: tuple[int, int, int] ) -> np.ndarray: """ This function reassembles the masks from the dataframe Parameters ---------- df : pd.DataFrame The dataframe containing the masks original_img_shape : tuple The shape of the original image Returns ------- np.ndarray The reassembled masks """ # get the dimensions of the original image # make an empty array to hold the reassembled masks dict_of_masks = {} # get all unique group indices label = np.unique(df["label"]) for index in label: tmp_df = df.loc[df["label"] == index] dict_of_masks[index] = tmp_df["mask_indices"].values reassembled_masks = np.zeros( (original_img_shape[1], original_img_shape[2]), dtype=np.uint8 ) for index, mask in dict_of_masks.items(): # set the pixels in the reassembled masks to the index value for m in mask: reassembled_masks[m] = index + 1 # add 1 such that none will equal zero return reassembled_masks
[docs] def get_dimensionality(image_array: np.ndarray) -> int: """ This function returns the dimensionality of an image array while checking if the input is a numpy array Parameters ---------- image_array : np.ndarray The image array to check the dimensionality of Returns ------- int The dimensionality of the image array Raises ------ TypeError If the input is not a numpy array """ if not isinstance(image_array, np.ndarray): raise TypeError("image_array must be a numpy array") return len(image_array.shape)
[docs] def get_number_of_unique_labels(image_array: np.ndarray) -> int: """ This function returns the number of unique labels in an image array Parameters ---------- image_array : np.ndarray The image array to check the number of unique labels Returns ------- int The number of unique labels in the image array """ get_dimensionality(image_array) return len(np.unique(image_array))