diff --git a/.gitignore b/.gitignore index 1d77a72..6099c79 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# This project + +example_output/ + # ---> Python # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/pyproject.toml b/pyproject.toml index 2393c3a..cf21fdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,8 @@ classifiers = [ ] requires = [ - + "sensospot_data", + "Pillow >= 8.0", ] requires-python = ">=3.7" diff --git a/run.py b/run.py new file mode 100644 index 0000000..70c9c98 --- /dev/null +++ b/run.py @@ -0,0 +1,24 @@ +from pathlib import Path +from PIL import Image, ImageDraw, ImageFont + +from sensospot_images import process +from sensospot_data import parse_file +from sensospot_data.parameters import _search_measurement_params_file + +from defusedxml import ElementTree + +import itertools + +THIS_FILE_DIR = Path(__file__).absolute().parent + +EXAMPLE_DATA_DIR = THIS_FILE_DIR / "example_data" +EXAMPLE_IMAGE = EXAMPLE_DATA_DIR / "11Aug2020 NQC0329 Dry SG2-127-002_1_A01_1.tif" +EXAMPLE_DATA = EXAMPLE_DATA_DIR / "11Aug2020 NQC0329 Dry SG2-127-002_1_A01_1.csv" + +OUTPUT_DIR = THIS_FILE_DIR / "example_output" +old_output_files = [i for i in OUTPUT_DIR.iterdir() if i.is_file()] +for path in old_output_files: + path.unlink() + + +process(EXAMPLE_DATA_DIR, OUTPUT_DIR) diff --git a/sensospot_images/__init__.py b/sensospot_images/__init__.py index 8fa57ea..6e8ba04 100644 --- a/sensospot_images/__init__.py +++ b/sensospot_images/__init__.py @@ -4,3 +4,115 @@ Creating nice spot images from scans """ __version__ = "0.0.1" + +from pathlib import Path +import sys + +from sensospot_data import parse_file +from sensospot_data.parameters import _search_measurement_params_file + +from .images import recalculate, get_position, annotate_image, load_array_image, crop +from .parameters import get_spot_parameters, get_array_parameters + + + + +def calulate_pixel_size(data_frame, array_definition): + first = get_position(data_frame.iloc[0], actual=False) + last = get_position(data_frame.iloc[-1], actual=False) + + x_dist_pixel = last.x - first.x + y_dist_pixel = last.y - first.y + + ad = array_definition + x_dist_um = ad.dist_x * (ad.size_x - 1) + y_dist_um = ad.dist_y * (ad.size_y - 1) + + if x_dist_um == 0: + # only one spot in x direction + return x_dist_um / x_dist_pixel + elif y_dist_um == 0: + # only one spot in x direction + return y_dist_um / y_dist_pixel + + # more than one spot in each direction + x_pixel_size = x_dist_um / x_dist_pixel + y_pixel_size = y_dist_um / y_dist_pixel + + return (x_pixel_size + y_pixel_size) / 2 + + +def get_example_data_path(input_dir): + input_path = Path(input_dir) + tif_files = input_path.glob("*.tif") + example_tif = next(tif_files) + return example_tif.with_suffix(".csv") + + +def get_filename_prefix(input_dir): + file_path = get_example_data_path(input_dir) + example_name = file_path.stem + prefix, well, exposure = example_name.rsplit("_", 2) + return prefix + + +def retrieve_spot_parameters(input_dir, scale): + parameters_path = _search_measurement_params_file(input_dir) + if parameters_path is None: + sys.exit(f"Could not find parameter files in {input_dir}") + + array_parameters = get_array_parameters(parameters_path) + spot_parameters = get_spot_parameters(parameters_path, array_parameters) + + example_data_path = get_example_data_path(input_dir) + example_data = parse_file(example_data_path) + + pixel_size = calulate_pixel_size(example_data, array_parameters) + + return recalculate(spot_parameters, scale / pixel_size) + +def search_image_files(input_dir, wells, exposures): + input_path = Path(input_dir) + prefix = get_filename_prefix(input_path) + tmp_pattern = f"{prefix}_*{wells}*_*{exposures}.tif" + pattern = tmp_pattern.replace("***", "*").replace("**", "*") + return input_path.glob(pattern) + +def create_file_map(input_dir, wells, exposures): + file_map = {} + for tif_path in search_image_files(input_dir, wells, exposures): + rest, exposure = tif_path.stem.rsplit("_", 1) + csv_path = tif_path.parent / f"{rest}_1.csv" + if csv_path.is_file(): + if csv_path not in file_map: + file_map[csv_path] = [] + file_map[csv_path].append(tif_path) + return file_map + +def process_image(image_file, spot_parameters, spot_data, scale): + img = load_array_image(image_file, scale=scale) + annotate_image(img, spot_parameters, spot_data, scale) + return img + +def create_crops(output_path, img, image_path, spot_parameters, array_data, scale): + base_name = image_path.stem + for index, spot_data in array_data.iterrows(): + cropped_img = crop(img, spot_parameters, spot_data, scale) + new_path = output_path / f"{base_name}_{index + 1:03}.tif" + cropped_img.save(new_path) + +def process(input_dir, output_dir, scale=3, wells="*", exposures="*", add_single_spots=False): + spot_parameters = retrieve_spot_parameters(input_dir, scale) + file_map = create_file_map(input_dir, wells, exposures) + output_path = Path(output_dir) + if not output_path.is_dir(): + sys.exit(f"Could not find output directory: {output_dir}") + for data_file, image_files in file_map.items(): + array_data = parse_file(data_file) + print(data_file) + for image_path in image_files: + img = process_image(image_path, spot_parameters, array_data, scale) + img.save(output_path / image_path.name) + if add_single_spots: + create_crops(output_path, img, image_path, spot_parameters, array_data, scale) + diff --git a/sensospot_images/arial.ttf b/sensospot_images/arial.ttf new file mode 100644 index 0000000..8682d94 Binary files /dev/null and b/sensospot_images/arial.ttf differ diff --git a/sensospot_images/images.py b/sensospot_images/images.py new file mode 100644 index 0000000..aa47126 --- /dev/null +++ b/sensospot_images/images.py @@ -0,0 +1,90 @@ +from pathlib import Path +from collections import namedtuple + +import numpy +from PIL import Image, ImageDraw, ImageFont + +Point = namedtuple("Point", ["x", "y"]) +Spot = namedtuple("Spot", ["found", "x", "y"]) + + +THIS_DIR = Path(__file__).absolute().parent +FONT_PATH = THIS_DIR / "arial.ttf" +FONT = ImageFont.truetype(str(FONT_PATH), 32) + +GREEN = (0, 255, 0) +RED = (255, 0, 0) + + +def recalculate(iterable, factor): + recalculated = [v * factor for v in iterable] + cls = type(iterable) + try: + return cls(*recalculated) + except TypeError: + return tuple(recalculated) + + +def convert_16bit_to_8bit(img): + array = numpy.uint8(numpy.array(img) / 256) + return Image.fromarray(array) + + +def convert_16bit_grey_to_color(img): + return convert_16bit_to_8bit(img).convert(mode="RGB") + + +def resize(img, scale=1): + return img.resize(recalculate(img.size, scale)) + + +def load_array_image(file_path, scale=1): + img = Image.open(file_path) + colored = convert_16bit_grey_to_color(img) + return resize(colored, scale) + + +def get_position(data_series, actual=True): + prefix = "Pos" if actual else "Pos.Nom" + x = int(data_series[f"{prefix}.X"]) + y = int(data_series[f"{prefix}.Y"]) + return Point(x, y) + + +def get_box(center, width, height=None): + height = width if height is None else height + + dx = width / 2 + dy = height / 2 + + return (center.x - dx, center.y - dy, center.x + dx, center.y + dy) + + +def annotate_spot(canvas, spot_parameters, spot_data, scale=1): + found = spot_data["Spot.Found"] + center = recalculate(get_position(spot_data, found), scale) + color = GREEN if found else RED + + box = get_box(center, spot_parameters.roi_x, spot_parameters.roi_y) + canvas.rectangle(box, outline=color, width=1) + + box = get_box(center, spot_parameters.radius_bkg) + canvas.ellipse(box, outline=color, width=2) + + box = get_box(center, spot_parameters.radius_spot) + canvas.ellipse(box, outline=color, width=1) + + canvas.text(center, str(spot_data["Pos.Id"]), RED, font=FONT) + + +def annotate_image(array_img, spot_parameters, array_data, scale): + canvas = ImageDraw.Draw(array_img) + for index, spot_data in array_data.iterrows(): + annotate_spot(canvas, spot_parameters, spot_data, scale) + + +def crop(array_img, spot_parameters, spot_data, scale=1): + found = spot_data["Spot.Found"] + center = recalculate(get_position(spot_data, found), scale) + box = get_box(center, spot_parameters.crop_x, spot_parameters.crop_y) + return array_img.crop(box) diff --git a/sensospot_images/parameters.py b/sensospot_images/parameters.py new file mode 100644 index 0000000..b83ff42 --- /dev/null +++ b/sensospot_images/parameters.py @@ -0,0 +1,54 @@ +from pathlib import Path +from collections import namedtuple + +from defusedxml import ElementTree + +ArrayParameters = namedtuple( + "ArrayParameters", ["size_x", "size_y", "dist_x", "dist_y"] +) + +SpotParameters = namedtuple( + "SpotParameters", + ["radius_spot", "radius_bkg", "roi_x", "roi_y", "crop_x", "crop_y"], +) + + +def _to_micro_meters(value): + as_float = float(value) + return int(as_float * 1000) + + +def array_parameters_from_xml(tree): + layout = tree.find("Layout") + sx = int(layout.attrib["NofSpotsX"]) + sy = int(layout.attrib["NofSpotsY"]) + dx = _to_micro_meters(layout.attrib["SpotDistMmX"]) + dy = _to_micro_meters(layout.attrib["SpotDistMmY"]) + return ArrayParameters(sx, sy, dx, dy) + + +def spot_parameters_from_xml(tree, array_parameters): + array = tree.find("MicroArrayAnalysis").find("Settings") + reg = tree.find("Registration").find("Settings") + rs = _to_micro_meters(array.attrib["MinSpotSizeMm"]) + rb = _to_micro_meters(array.attrib["MaxSpotSizeMm"]) + rx = array_parameters.dist_x + ry = array_parameters.dist_y + # ROI is strangely named in params file, it's actually the outside crop + cx = _to_micro_meters(reg.attrib["ROIMarginWidth"]) + cy = _to_micro_meters(reg.attrib["ROIMarginHeight"]) + return SpotParameters(rs, rb, rx, ry, cx, cy) + + +def get_array_parameters(params_file): + svary_file = Path(params_file).with_suffix(".svary") + with svary_file.open("r") as file_handle: + tree = ElementTree.parse(file_handle) + return array_parameters_from_xml(tree) + + +def get_spot_parameters(params_file, array_parameters): + svalg_file = Path(params_file).with_suffix(".svalg") + with svalg_file.open("r") as file_handle: + tree = ElementTree.parse(file_handle) + return spot_parameters_from_xml(tree, array_parameters)