From 6eb7c350f06e0ed09b6014365eb1f6de61f8714c Mon Sep 17 00:00:00 2001
From: Holger Frey <frey@imtek.de>
Date: Thu, 29 Apr 2021 12:36:43 +0200
Subject: [PATCH] added lot of code documentation

---
 CHANGES.md                   |   6 ++
 README.md                    |   8 +--
 sensospot_grid/__init__.py   | 123 +++++++++++++++++++++++++++++++----
 sensospot_grid/images.py     |  93 ++++++++++++++++++++++++--
 sensospot_grid/parameters.py |  69 ++++++++++++++++++--
 5 files changed, 270 insertions(+), 29 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 2fd3f54..1c72ece 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,9 @@
+0.1.1  - it works!
+------------------
+
+ - first working cli version
+
+
 0.0.1  - first version
 ----------------------
 
diff --git a/README.md b/README.md
index e7ed525..6e7659a 100644
--- a/README.md
+++ b/README.md
@@ -7,9 +7,9 @@ Creating nice spot images from scans
 
 ```python
 
-    import sensospot_images
+    import sensospot_grid
 
-    sensospot_images.run()
+    sensospot_grid.run()
 ```
 
 
@@ -17,10 +17,10 @@ Creating nice spot images from scans
 
 To install the development version of Sensospot Images:
 
-    git clone https://github.com/holgi/sensospot_images.git
+    git clone https://github.com/holgi/sensospot_grid.git
 
     # create a virtual environment and install all required dev dependencies
-    cd sensospot_images
+    cd sensospot_grid
     make devenv
 
 To run the tests, use `make tests` or `make coverage` for a complete report.
diff --git a/sensospot_grid/__init__.py b/sensospot_grid/__init__.py
index da2a7e4..d4732ba 100644
--- a/sensospot_grid/__init__.py
+++ b/sensospot_grid/__init__.py
@@ -1,9 +1,9 @@
 """ Sensospot Images
 
-Creating nice spot images from scans
+Creating nice grid images from scans
 """
 
-__version__ = "0.0.1"
+__version__ = "0.1.0"
 
 import sys
 from pathlib import Path
@@ -24,12 +24,21 @@ from .parameters import get_spot_parameters, get_array_parameters
 
 
 def calulate_pixel_size(data_frame, array_definition):
+    """calculate the pixel size in micrometers
+
+    data_frame: the numerical analysis from sensospot of one image
+    array_definition: named tuple with sensospot settings
+
+    returns: pixel size in micrometers
+    """
+    # get the first and last probe position in the data
     first = get_position(data_frame.iloc[0], actual=False)
     last = get_position(data_frame.iloc[-1], actual=False)
 
     x_dist_pixel = last.x - first.x
     y_dist_pixel = last.y - first.y
 
+    # calculate the distance between the first and last spot on x and y axis
     ad = array_definition
     x_dist_um = ad.dist_x * (ad.size_x - 1)
     y_dist_um = ad.dist_y * (ad.size_y - 1)
@@ -49,6 +58,16 @@ def calulate_pixel_size(data_frame, array_definition):
 
 
 def get_example_data_path(input_dir):
+    """returns the path to one data file in the scan folder
+
+    this will search an array image file first and use this as a template
+    filename for a csv file. Some raw data folders contain csv files that
+    are unrelated to image files and should be avoided.
+
+    input_dir: the raw data directory to search in
+
+    returns: path to one csv file
+    """
     input_path = Path(input_dir)
     tif_files = input_path.glob("*.tif")
     example_tif = next(tif_files)
@@ -56,6 +75,16 @@ def get_example_data_path(input_dir):
 
 
 def get_filename_prefix(input_dir):
+    """returns the common file name prefix for tif and csv files
+
+    The raw data in the scanfolder has the format:
+        `[prefix]_[well]_[exposure].[suffix]`
+    To be able to find all well and exposure files, the prefix is needed.
+
+    input_dir: the raw data directory to search in
+
+    returns: the prefix for array analysis files
+    """
     file_path = get_example_data_path(input_dir)
     example_name = file_path.stem
     prefix, well, exposure = example_name.rsplit("_", 2)
@@ -63,6 +92,17 @@ def get_filename_prefix(input_dir):
 
 
 def retrieve_spot_parameters(input_dir, scale):
+    """returns the spot parameter settings of the analysis
+
+    All setting parameters are stored in a special folder in the raw data
+    directory. This parses the files needed and recalculates the values
+    in micrometers to pixels,
+
+    input_dir: the raw data directory to search in
+    scale: scale factor for the image
+
+    returns: a named tuple with spot parameters
+    """
     parameters_path = _search_measurement_params_file(input_dir)
     if parameters_path is None:
         sys.exit(f"Could not find parameter files in {input_dir}")
@@ -79,6 +119,14 @@ def retrieve_spot_parameters(input_dir, scale):
 
 
 def search_image_files(input_dir, wells, exposures):
+    """searches for tif files in the raw data directory
+
+    input_dir: the raw data directory to search in
+    wells: glob pattern for well matching
+    exposures: glob pattern for exposure id matching
+
+    return: iterator of tif file paths
+    """
     input_path = Path(input_dir)
     prefix = get_filename_prefix(input_path)
     tmp_pattern = f"{prefix}_*{wells}*_*{exposures}.tif"
@@ -87,10 +135,23 @@ def search_image_files(input_dir, wells, exposures):
 
 
 def create_file_map(input_dir, wells, exposures):
+    """creates a dictionary relating array data files to image files
+
+    Only the first exposure of an exposure series is used for spot location,
+    therfore only the data of the exposure id 1 should be used for any
+    exposure of the well
+
+    input_dir: the raw data directory to search in
+    wells: glob pattern for well matching
+    exposures: glob pattern for exposure id matching
+
+    returns: dict, data file paths as keys, list of image file paths as values
+    """
+
     file_map = {}
     for tif_path in search_image_files(input_dir, wells, exposures):
-        rest, exposure = tif_path.stem.rsplit("_", 1)
-        csv_path = tif_path.parent / f"{rest}_1.csv"
+        well, rest = tif_path.stem.rsplit("_", 1)
+        csv_path = tif_path.parent / f"{well}_1.csv"
         if csv_path.is_file():
             if csv_path not in file_map:
                 file_map[csv_path] = []
@@ -98,15 +159,32 @@ def create_file_map(input_dir, wells, exposures):
     return file_map
 
 
-def process_image(image_file, spot_parameters, spot_data, scale):
+def process_image(image_file, spot_parameters, array_data, scale):
+    """load and annotate image
+
+    image_file: file path to array image
+    spot_parameters: settings of spot analysis
+    array_data: numerical data of analysed image
+    scale: scale factor for the image
+
+    returns: annotated image file
+    """
     img = load_array_image(image_file, scale=scale)
-    annotate_image(img, spot_parameters, spot_data, scale)
+    annotate_image(img, spot_parameters, array_data, scale)
     return img
 
 
 def create_crops(
     output_path, img, image_path, spot_parameters, array_data, scale
 ):
+    """create cropped images for each spot
+
+    output_path: directory to store the cropped images in
+    image_file: file path to array image
+    spot_parameters: settings of spot analysis
+    array_data: numerical data of analysed image
+    scale: scale factor for the image
+    """
     base_name = image_path.stem
     for index, spot_data in array_data.iterrows():
         cropped_img = crop(img, spot_parameters, spot_data, scale)
@@ -120,8 +198,17 @@ def process(
     wells="*",
     exposures="*",
     scale=3,
-    add_single_spots=False,
+    crop_single_spots=False,
 ):
+    """process a complete raw data directory
+
+    input_dir: the raw data directory
+    output_dir: directory to store the cropped images in
+    wells: glob pattern for well matching
+    exposures: glob pattern for exposure id matching
+    scale: scale factor for the output images
+    crop_single_spots: create cropped images for each spot
+    """
     spot_parameters = retrieve_spot_parameters(input_dir, scale)
     file_map = create_file_map(input_dir, wells, exposures)
     output_path = Path(output_dir)
@@ -133,7 +220,7 @@ def process(
         for image_path in image_files:
             img = process_image(image_path, spot_parameters, array_data, scale)
             img.save(output_path / image_path.name)
-            if add_single_spots:
+            if crop_single_spots:
                 create_crops(
                     output_path,
                     img,
@@ -184,18 +271,26 @@ def process(
     help="scale-up of images",
 )
 @click.option(
-    "--spots",
+    "--crop",
     default=False,
     is_flag=True,
     show_default=True,
-    help="include cropped images of spots",
+    help="create cropped images of each spots",
 )
-def run(source, output=None, wells="*", exposures="*", scale=3, spots=False):
+def run(source, output=None, wells="*", exposures="*", scale=3, crop=False):
+    """command line interface to process a complete raw data directory
+
+    source: the raw data directory
+    output: directory to store the cropped images in
+    wells: glob pattern for well matching
+    exposures: glob pattern for exposure id matching
+    scale: scale factor for the output images
+    crop: create cropped images for each spot
+    """
     source = Path(source)
     if output is None or not Path(output).is_dir():
-        parent = source.absolute().parent
         now = datetime.now().strftime("%Y-%m-%d %H-%M-%S")
-        output = parent / now
+        output = source.absolute().parent / now
         output.mkdir(exist_ok=True)
 
-    process(source, output, wells, exposures, scale, spots)
+    process(source, output, wells, exposures, scale, crop)
diff --git a/sensospot_grid/images.py b/sensospot_grid/images.py
index aa47126..d47fb40 100644
--- a/sensospot_grid/images.py
+++ b/sensospot_grid/images.py
@@ -17,50 +17,115 @@ RED = (255, 0, 0)
 
 
 def recalculate(iterable, factor):
+    """dirty hack to apply a factor to items in a tuple
+
+    iterable: the tuple to apply the factor
+    factor: the factor to apply
+
+    returns: tuple or namedtuple with factor applied to items
+    """
     recalculated = [v * factor for v in iterable]
-    cls = type(iterable)
     try:
+        # for named tuples
+        cls = type(iterable)
         return cls(*recalculated)
     except TypeError:
+        # for tuples
         return tuple(recalculated)
 
 
 def convert_16bit_to_8bit(img):
+    """convert a 16 bit grey scale image to 8 bit without intensity crop
+
+    img: image, 16 bit greyscale
+
+    returns: image, 8 bit greyscale
+    """
     array = numpy.uint8(numpy.array(img) / 256)
     return Image.fromarray(array)
 
 
 def convert_16bit_grey_to_color(img):
+    """convert a 16 bit grey scale image to 8 bit rgbwithout intensity crop
+
+    img: image, 16 bit greyscale
+
+    returns: image, 8 bit rgb
+    """
     return convert_16bit_to_8bit(img).convert(mode="RGB")
 
 
 def resize(img, scale=1):
+    """resizes an image by a scale factor
+
+    img: image to resize
+    scale: scale factor
+
+    returns: rescaled image
+    """
     return img.resize(recalculate(img.size, scale))
 
 
 def load_array_image(file_path, scale=1):
+    """loads an array image file
+
+    The image is converted to rgb and scaled.
+
+    file_path: path to image file
+    scale: factor for scaling the image
+
+    returns: image
+    """
     img = Image.open(file_path)
     colored = convert_16bit_grey_to_color(img)
     return resize(colored, scale)
 
 
-def get_position(data_series, actual=True):
+def get_position(spot_data, actual=True):
+    """returns the position of a spot from spot data
+
+    spot_data: numerical spot data
+    actual: use the detected position if True, nominal position if False
+
+    returns: a Point with x- and y-positions in pixels
+    """
     prefix = "Pos" if actual else "Pos.Nom"
-    x = int(data_series[f"{prefix}.X"])
-    y = int(data_series[f"{prefix}.Y"])
+    x = int(spot_data[f"{prefix}.X"])
+    y = int(spot_data[f"{prefix}.Y"])
     return Point(x, y)
 
 
 def get_box(center, width, height=None):
+    """returns the upper left and lower right corner of a bounding box
+
+    center: position of the center of the box as "Point" namedtuple
+    width: width of the bounding box
+    height: height of the bounding box, if None, the width is used
+
+    returns: tuple (upper left x, upper left y, lower right x, lower right y)
+    """
     height = width if height is None else height
 
     dx = width / 2
     dy = height / 2
 
+    # First, I tried to return (Point(x1, y1), Point(x2, y2)) but this format
+    # is not accepted by all pillow methods that are used.
+    # The form (x1, y1, x2, y2) is on the other hand.
     return (center.x - dx, center.y - dy, center.x + dx, center.y + dy)
 
 
 def annotate_spot(canvas, spot_parameters, spot_data, scale=1):
+    """annotate one spot position
+
+    draws the circles and squares around a spot and adds the spot id
+
+    canvas: a ImageDraw.Draw instance to use
+    spot_parameters: settings of spot analysis
+    spot_data: numerical spot data
+    scale: image scale factor
+    """
+
     found = spot_data["Spot.Found"]
     center = recalculate(get_position(spot_data, found), scale)
     color = GREEN if found else RED
@@ -78,12 +143,32 @@ def annotate_spot(canvas, spot_parameters, spot_data, scale=1):
 
 
 def annotate_image(array_img, spot_parameters, array_data, scale):
+    """annotates all spot positions in an image
+
+    draws the circles and squares around all spots and adds the spot ids
+
+    array_img: the image to annotate
+    spot_parameters: settings of spot analysis
+    array_data: numerical data of all spots
+    scale: image scale factor
+    """
     canvas = ImageDraw.Draw(array_img)
     for index, spot_data in array_data.iterrows():
         annotate_spot(canvas, spot_parameters, spot_data, scale)
 
 
 def crop(array_img, spot_parameters, spot_data, scale=1):
+    """creates a cropped image of one spot
+
+    draws the circles and squares around all spots and adds the spot ids
+
+    array_img: the image to crop
+    spot_parameters: settings of spot analysis
+    spot_data: numerical spot data
+    scale: image scale factor
+
+    returns: cropped image
+    """
     found = spot_data["Spot.Found"]
     center = recalculate(get_position(spot_data, found), scale)
     box = get_box(center, spot_parameters.crop_x, spot_parameters.crop_y)
diff --git a/sensospot_grid/parameters.py b/sensospot_grid/parameters.py
index b83ff42..a1cdfc0 100644
--- a/sensospot_grid/parameters.py
+++ b/sensospot_grid/parameters.py
@@ -14,11 +14,25 @@ SpotParameters = namedtuple(
 
 
 def _to_micro_meters(value):
+    """helper method to convert a string with a value in mm to micrometers
+
+    example: _to_micro_meters("0.350") == 350
+
+    value: the value to convert
+
+    returns: the value converted to micrometers
+    """
     as_float = float(value)
     return int(as_float * 1000)
 
 
 def array_parameters_from_xml(tree):
+    """get the array parameter setting from an xml tree
+
+    tree: xml tree
+
+    returns: namedtuple ArrayParameters
+    """
     layout = tree.find("Layout")
     sx = int(layout.attrib["NofSpotsX"])
     sy = int(layout.attrib["NofSpotsY"])
@@ -28,27 +42,68 @@ def array_parameters_from_xml(tree):
 
 
 def spot_parameters_from_xml(tree, array_parameters):
+    """get the spot parameter setting from an xml tree
+
+    Some spot parameters are set as array parameters.
+
+    tree: xml tree
+    array_parameters: namedtuple ArrayParameters
+
+    returns: namedtuple SpotParameters
+    """
     array = tree.find("MicroArrayAnalysis").find("Settings")
-    reg = tree.find("Registration").find("Settings")
     rs = _to_micro_meters(array.attrib["MinSpotSizeMm"])
     rb = _to_micro_meters(array.attrib["MaxSpotSizeMm"])
+
+    # this values are actually array parameters
     rx = array_parameters.dist_x
     ry = array_parameters.dist_y
+
     # ROI is strangely named in params file, it's actually the outside crop
+    reg = tree.find("Registration").find("Settings")
     cx = _to_micro_meters(reg.attrib["ROIMarginWidth"])
     cy = _to_micro_meters(reg.attrib["ROIMarginHeight"])
+
     return SpotParameters(rs, rb, rx, ry, cx, cy)
 
 
-def get_array_parameters(params_file):
-    svary_file = Path(params_file).with_suffix(".svary")
-    with svary_file.open("r") as file_handle:
+def _parse_params_file(params_file, suffix):
+    """parse a parameters setttings file
+
+    For finding the params file in the raw data folder, the
+    `sensospot_data._search_measurement_params_file()` method is used. The
+    resulting file path is actually for a different file, but the one needed
+    resides in the same folder
+
+    params_file: one of the params file in the settings folder of the scan
+    suffix: file name extension of the right xml file
+
+    returns: ElementTree instance
+    """
+    xml_file = Path(params_file).with_suffix(suffix)
+    with xml_file.open("r") as file_handle:
         tree = ElementTree.parse(file_handle)
+    return tree
+
+
+def get_array_parameters(params_file):
+    """parse the array parameters file
+
+    params_file: one of the params file in the settings folder of the scan
+
+    returns: namedtuple ArrayParameters
+    """
+    tree = _parse_params_file(params_file, ".svary")
     return array_parameters_from_xml(tree)
 
 
 def get_spot_parameters(params_file, array_parameters):
-    svalg_file = Path(params_file).with_suffix(".svalg")
-    with svalg_file.open("r") as file_handle:
-        tree = ElementTree.parse(file_handle)
+    """parse the array parameters file
+
+    params_file: one of the params file in the settings folder of the scan
+    array_parameters: namedtuple ArrayParameters
+
+    returns: namedtuple SpotParameters
+    """
+    tree = _parse_params_file(params_file, ".svalg")
     return spot_parameters_from_xml(tree, array_parameters)