From 6eb7c350f06e0ed09b6014365eb1f6de61f8714c Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Thu, 29 Apr 2021 12:36:43 +0200 Subject: [PATCH] added lot of code documentation --- CHANGES.md | 6 ++ README.md | 8 +-- sensospot_grid/__init__.py | 123 +++++++++++++++++++++++++++++++---- sensospot_grid/images.py | 93 ++++++++++++++++++++++++-- sensospot_grid/parameters.py | 69 ++++++++++++++++++-- 5 files changed, 270 insertions(+), 29 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2fd3f54..1c72ece 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +0.1.1 - it works! +------------------ + + - first working cli version + + 0.0.1 - first version ---------------------- diff --git a/README.md b/README.md index e7ed525..6e7659a 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ Creating nice spot images from scans ```python - import sensospot_images + import sensospot_grid - sensospot_images.run() + sensospot_grid.run() ``` @@ -17,10 +17,10 @@ Creating nice spot images from scans To install the development version of Sensospot Images: - git clone https://github.com/holgi/sensospot_images.git + git clone https://github.com/holgi/sensospot_grid.git # create a virtual environment and install all required dev dependencies - cd sensospot_images + cd sensospot_grid make devenv To run the tests, use `make tests` or `make coverage` for a complete report. diff --git a/sensospot_grid/__init__.py b/sensospot_grid/__init__.py index da2a7e4..d4732ba 100644 --- a/sensospot_grid/__init__.py +++ b/sensospot_grid/__init__.py @@ -1,9 +1,9 @@ """ Sensospot Images -Creating nice spot images from scans +Creating nice grid images from scans """ -__version__ = "0.0.1" +__version__ = "0.1.0" import sys from pathlib import Path @@ -24,12 +24,21 @@ from .parameters import get_spot_parameters, get_array_parameters def calulate_pixel_size(data_frame, array_definition): + """calculate the pixel size in micrometers + + data_frame: the numerical analysis from sensospot of one image + array_definition: named tuple with sensospot settings + + returns: pixel size in micrometers + """ + # get the first and last probe position in the data first = get_position(data_frame.iloc[0], actual=False) last = get_position(data_frame.iloc[-1], actual=False) x_dist_pixel = last.x - first.x y_dist_pixel = last.y - first.y + # calculate the distance between the first and last spot on x and y axis ad = array_definition x_dist_um = ad.dist_x * (ad.size_x - 1) y_dist_um = ad.dist_y * (ad.size_y - 1) @@ -49,6 +58,16 @@ def calulate_pixel_size(data_frame, array_definition): def get_example_data_path(input_dir): + """returns the path to one data file in the scan folder + + this will search an array image file first and use this as a template + filename for a csv file. Some raw data folders contain csv files that + are unrelated to image files and should be avoided. + + input_dir: the raw data directory to search in + + returns: path to one csv file + """ input_path = Path(input_dir) tif_files = input_path.glob("*.tif") example_tif = next(tif_files) @@ -56,6 +75,16 @@ def get_example_data_path(input_dir): def get_filename_prefix(input_dir): + """returns the common file name prefix for tif and csv files + + The raw data in the scanfolder has the format: + `[prefix]_[well]_[exposure].[suffix]` + To be able to find all well and exposure files, the prefix is needed. + + input_dir: the raw data directory to search in + + returns: the prefix for array analysis files + """ file_path = get_example_data_path(input_dir) example_name = file_path.stem prefix, well, exposure = example_name.rsplit("_", 2) @@ -63,6 +92,17 @@ def get_filename_prefix(input_dir): def retrieve_spot_parameters(input_dir, scale): + """returns the spot parameter settings of the analysis + + All setting parameters are stored in a special folder in the raw data + directory. This parses the files needed and recalculates the values + in micrometers to pixels, + + input_dir: the raw data directory to search in + scale: scale factor for the image + + returns: a named tuple with spot parameters + """ parameters_path = _search_measurement_params_file(input_dir) if parameters_path is None: sys.exit(f"Could not find parameter files in {input_dir}") @@ -79,6 +119,14 @@ def retrieve_spot_parameters(input_dir, scale): def search_image_files(input_dir, wells, exposures): + """searches for tif files in the raw data directory + + input_dir: the raw data directory to search in + wells: glob pattern for well matching + exposures: glob pattern for exposure id matching + + return: iterator of tif file paths + """ input_path = Path(input_dir) prefix = get_filename_prefix(input_path) tmp_pattern = f"{prefix}_*{wells}*_*{exposures}.tif" @@ -87,10 +135,23 @@ def search_image_files(input_dir, wells, exposures): def create_file_map(input_dir, wells, exposures): + """creates a dictionary relating array data files to image files + + Only the first exposure of an exposure series is used for spot location, + therfore only the data of the exposure id 1 should be used for any + exposure of the well + + input_dir: the raw data directory to search in + wells: glob pattern for well matching + exposures: glob pattern for exposure id matching + + returns: dict, data file paths as keys, list of image file paths as values + """ + file_map = {} for tif_path in search_image_files(input_dir, wells, exposures): - rest, exposure = tif_path.stem.rsplit("_", 1) - csv_path = tif_path.parent / f"{rest}_1.csv" + well, rest = tif_path.stem.rsplit("_", 1) + csv_path = tif_path.parent / f"{well}_1.csv" if csv_path.is_file(): if csv_path not in file_map: file_map[csv_path] = [] @@ -98,15 +159,32 @@ def create_file_map(input_dir, wells, exposures): return file_map -def process_image(image_file, spot_parameters, spot_data, scale): +def process_image(image_file, spot_parameters, array_data, scale): + """load and annotate image + + image_file: file path to array image + spot_parameters: settings of spot analysis + array_data: numerical data of analysed image + scale: scale factor for the image + + returns: annotated image file + """ img = load_array_image(image_file, scale=scale) - annotate_image(img, spot_parameters, spot_data, scale) + annotate_image(img, spot_parameters, array_data, scale) return img def create_crops( output_path, img, image_path, spot_parameters, array_data, scale ): + """create cropped images for each spot + + output_path: directory to store the cropped images in + image_file: file path to array image + spot_parameters: settings of spot analysis + array_data: numerical data of analysed image + scale: scale factor for the image + """ base_name = image_path.stem for index, spot_data in array_data.iterrows(): cropped_img = crop(img, spot_parameters, spot_data, scale) @@ -120,8 +198,17 @@ def process( wells="*", exposures="*", scale=3, - add_single_spots=False, + crop_single_spots=False, ): + """process a complete raw data directory + + input_dir: the raw data directory + output_dir: directory to store the cropped images in + wells: glob pattern for well matching + exposures: glob pattern for exposure id matching + scale: scale factor for the output images + crop_single_spots: create cropped images for each spot + """ spot_parameters = retrieve_spot_parameters(input_dir, scale) file_map = create_file_map(input_dir, wells, exposures) output_path = Path(output_dir) @@ -133,7 +220,7 @@ def process( for image_path in image_files: img = process_image(image_path, spot_parameters, array_data, scale) img.save(output_path / image_path.name) - if add_single_spots: + if crop_single_spots: create_crops( output_path, img, @@ -184,18 +271,26 @@ def process( help="scale-up of images", ) @click.option( - "--spots", + "--crop", default=False, is_flag=True, show_default=True, - help="include cropped images of spots", + help="create cropped images of each spots", ) -def run(source, output=None, wells="*", exposures="*", scale=3, spots=False): +def run(source, output=None, wells="*", exposures="*", scale=3, crop=False): + """command line interface to process a complete raw data directory + + source: the raw data directory + output: directory to store the cropped images in + wells: glob pattern for well matching + exposures: glob pattern for exposure id matching + scale: scale factor for the output images + crop: create cropped images for each spot + """ source = Path(source) if output is None or not Path(output).is_dir(): - parent = source.absolute().parent now = datetime.now().strftime("%Y-%m-%d %H-%M-%S") - output = parent / now + output = source.absolute().parent / now output.mkdir(exist_ok=True) - process(source, output, wells, exposures, scale, spots) + process(source, output, wells, exposures, scale, crop) diff --git a/sensospot_grid/images.py b/sensospot_grid/images.py index aa47126..d47fb40 100644 --- a/sensospot_grid/images.py +++ b/sensospot_grid/images.py @@ -17,50 +17,115 @@ RED = (255, 0, 0) def recalculate(iterable, factor): + """dirty hack to apply a factor to items in a tuple + + iterable: the tuple to apply the factor + factor: the factor to apply + + returns: tuple or namedtuple with factor applied to items + """ recalculated = [v * factor for v in iterable] - cls = type(iterable) try: + # for named tuples + cls = type(iterable) return cls(*recalculated) except TypeError: + # for tuples return tuple(recalculated) def convert_16bit_to_8bit(img): + """convert a 16 bit grey scale image to 8 bit without intensity crop + + img: image, 16 bit greyscale + + returns: image, 8 bit greyscale + """ array = numpy.uint8(numpy.array(img) / 256) return Image.fromarray(array) def convert_16bit_grey_to_color(img): + """convert a 16 bit grey scale image to 8 bit rgbwithout intensity crop + + img: image, 16 bit greyscale + + returns: image, 8 bit rgb + """ return convert_16bit_to_8bit(img).convert(mode="RGB") def resize(img, scale=1): + """resizes an image by a scale factor + + img: image to resize + scale: scale factor + + returns: rescaled image + """ return img.resize(recalculate(img.size, scale)) def load_array_image(file_path, scale=1): + """loads an array image file + + The image is converted to rgb and scaled. + + file_path: path to image file + scale: factor for scaling the image + + returns: image + """ img = Image.open(file_path) colored = convert_16bit_grey_to_color(img) return resize(colored, scale) -def get_position(data_series, actual=True): +def get_position(spot_data, actual=True): + """returns the position of a spot from spot data + + spot_data: numerical spot data + actual: use the detected position if True, nominal position if False + + returns: a Point with x- and y-positions in pixels + """ prefix = "Pos" if actual else "Pos.Nom" - x = int(data_series[f"{prefix}.X"]) - y = int(data_series[f"{prefix}.Y"]) + x = int(spot_data[f"{prefix}.X"]) + y = int(spot_data[f"{prefix}.Y"]) return Point(x, y) def get_box(center, width, height=None): + """returns the upper left and lower right corner of a bounding box + + center: position of the center of the box as "Point" namedtuple + width: width of the bounding box + height: height of the bounding box, if None, the width is used + + returns: tuple (upper left x, upper left y, lower right x, lower right y) + """ height = width if height is None else height dx = width / 2 dy = height / 2 + # First, I tried to return (Point(x1, y1), Point(x2, y2)) but this format + # is not accepted by all pillow methods that are used. + # The form (x1, y1, x2, y2) is on the other hand. return (center.x - dx, center.y - dy, center.x + dx, center.y + dy) def annotate_spot(canvas, spot_parameters, spot_data, scale=1): + """annotate one spot position + + draws the circles and squares around a spot and adds the spot id + + canvas: a ImageDraw.Draw instance to use + spot_parameters: settings of spot analysis + spot_data: numerical spot data + scale: image scale factor + """ + found = spot_data["Spot.Found"] center = recalculate(get_position(spot_data, found), scale) color = GREEN if found else RED @@ -78,12 +143,32 @@ def annotate_spot(canvas, spot_parameters, spot_data, scale=1): def annotate_image(array_img, spot_parameters, array_data, scale): + """annotates all spot positions in an image + + draws the circles and squares around all spots and adds the spot ids + + array_img: the image to annotate + spot_parameters: settings of spot analysis + array_data: numerical data of all spots + scale: image scale factor + """ canvas = ImageDraw.Draw(array_img) for index, spot_data in array_data.iterrows(): annotate_spot(canvas, spot_parameters, spot_data, scale) def crop(array_img, spot_parameters, spot_data, scale=1): + """creates a cropped image of one spot + + draws the circles and squares around all spots and adds the spot ids + + array_img: the image to crop + spot_parameters: settings of spot analysis + spot_data: numerical spot data + scale: image scale factor + + returns: cropped image + """ found = spot_data["Spot.Found"] center = recalculate(get_position(spot_data, found), scale) box = get_box(center, spot_parameters.crop_x, spot_parameters.crop_y) diff --git a/sensospot_grid/parameters.py b/sensospot_grid/parameters.py index b83ff42..a1cdfc0 100644 --- a/sensospot_grid/parameters.py +++ b/sensospot_grid/parameters.py @@ -14,11 +14,25 @@ SpotParameters = namedtuple( def _to_micro_meters(value): + """helper method to convert a string with a value in mm to micrometers + + example: _to_micro_meters("0.350") == 350 + + value: the value to convert + + returns: the value converted to micrometers + """ as_float = float(value) return int(as_float * 1000) def array_parameters_from_xml(tree): + """get the array parameter setting from an xml tree + + tree: xml tree + + returns: namedtuple ArrayParameters + """ layout = tree.find("Layout") sx = int(layout.attrib["NofSpotsX"]) sy = int(layout.attrib["NofSpotsY"]) @@ -28,27 +42,68 @@ def array_parameters_from_xml(tree): def spot_parameters_from_xml(tree, array_parameters): + """get the spot parameter setting from an xml tree + + Some spot parameters are set as array parameters. + + tree: xml tree + array_parameters: namedtuple ArrayParameters + + returns: namedtuple SpotParameters + """ array = tree.find("MicroArrayAnalysis").find("Settings") - reg = tree.find("Registration").find("Settings") rs = _to_micro_meters(array.attrib["MinSpotSizeMm"]) rb = _to_micro_meters(array.attrib["MaxSpotSizeMm"]) + + # this values are actually array parameters rx = array_parameters.dist_x ry = array_parameters.dist_y + # ROI is strangely named in params file, it's actually the outside crop + reg = tree.find("Registration").find("Settings") cx = _to_micro_meters(reg.attrib["ROIMarginWidth"]) cy = _to_micro_meters(reg.attrib["ROIMarginHeight"]) + return SpotParameters(rs, rb, rx, ry, cx, cy) -def get_array_parameters(params_file): - svary_file = Path(params_file).with_suffix(".svary") - with svary_file.open("r") as file_handle: +def _parse_params_file(params_file, suffix): + """parse a parameters setttings file + + For finding the params file in the raw data folder, the + `sensospot_data._search_measurement_params_file()` method is used. The + resulting file path is actually for a different file, but the one needed + resides in the same folder + + params_file: one of the params file in the settings folder of the scan + suffix: file name extension of the right xml file + + returns: ElementTree instance + """ + xml_file = Path(params_file).with_suffix(suffix) + with xml_file.open("r") as file_handle: tree = ElementTree.parse(file_handle) + return tree + + +def get_array_parameters(params_file): + """parse the array parameters file + + params_file: one of the params file in the settings folder of the scan + + returns: namedtuple ArrayParameters + """ + tree = _parse_params_file(params_file, ".svary") return array_parameters_from_xml(tree) def get_spot_parameters(params_file, array_parameters): - svalg_file = Path(params_file).with_suffix(".svalg") - with svalg_file.open("r") as file_handle: - tree = ElementTree.parse(file_handle) + """parse the array parameters file + + params_file: one of the params file in the settings folder of the scan + array_parameters: namedtuple ArrayParameters + + returns: namedtuple SpotParameters + """ + tree = _parse_params_file(params_file, ".svalg") return spot_parameters_from_xml(tree, array_parameters)