contactangle/contactangle.py

''' collecting data from measured contact angles

While measuring contact angles, for each drop three pictures are recorded
for the static, advancing and the receeding contact angle.

Usually the file name of this pictures consists of four parts:
1. an identifier for the measurement
2. what type of angle was measured, e.g. RCA for receeding contact angle
3. the measured contact angle on the left side of the drop
4. the measured contact angle on the right side of the drop

So a typical file name looks quite cyptic like: 'PS1 RCA 12,5 15,6'

In adittion to cryptic file names with abbreviations, there is normally
quite an amount of files: 5 measurements x 3 types = 15 files for one
substrate or coating alone.

This script provides means to rename these files to a more verbose version
and to collect the data into a text file that can be imported in excel.

Since the naming of the type of measurement and the contact angles themselves
are somehow fixed, the 'user defined action' is renaming the identifier. To
do such a rename, a function accepting the identifier as string and returning
a verbose version as a string needs to be provided

An Example
>>> import conactangle as ca
>>> def verbose_id(identifier):
...     if identifier.startswith('PS'):
...        verbose_name = 'Polystyrol'
...     else:
...        verbose_name = 'Unknown Substrate'
...     number = identifier[-1]
...     return '{}, Measurement  {}'.format(verbose_name, number)
...
>>> ca.rename(verbose_id, path='/example/directory')
This will change the cryptic filename 'PS1 RCA 12,5 15,6.bmp' to
'Polystyrol, Measurement 1, receeding, L12,5 R15,6.bmp'

To not tinker with the original files, the 'renaming' is done on a copy of
the orignal data. The results file is called 'results.txt' and is created
in the same directory where the raw data resides.

This is actually a two step process: first the files are processed and the
future result of the rename is printed to stdout. If no exception occurs,
the renaming of the files is done in a second round.
'''

import os
import shutil
from collections import namedtuple, OrderedDict

# constants
VERBOSE_TEMPLATE = '{i}, {m.type}, L{m.left} R{m.right}'
TYPE_ABBREVIATIONS = [(a[0], a) for a in ['static', 'receeding', 'advancing']]
NAN_STRING = '-'

# constants for excel output
XLS_HEADLINE1 = ('Measurement', 'Position', 'Static', 'Advancing', 'Receeding')
XLS_HEADLINE2 = ('Measurement', '', 'Static', 'Advancing', 'Receeding')
XLS_CELLS = ('=A2', '{h}', '={f}(C2:C{x})', '={f}(D2:D{x})', '={f}(E2:E{x})')
XLS_AGGREGATIONS = (
    ('n', 'ANZAHL'), ('Mittelwert', 'MITTELWERT'), ('Std.Abw.', 'STABW'))


# records the name of a source file
FileInfo = namedtuple('FileInfo', ['path', 'dir', 'name', 'ext'])

# records the data found in the name of a source file
Measurement = namedtuple('Measurement', ['id', 'type', 'left', 'right'])


def rename(rename_func, path='.'):
    ''' renames contact angle source files in a folder

    rename_func: function that accepts an abbreviated identifier
                 and returns a verbose version
    path:        path to the folder to process
    xls_dec_sep: decimal separator for the resulting xls file
    '''
    entries = sorted(os.listdir(path))
    files = [f for f in entries if not f.startswith('.')]
    bitmaps = [f for f in files if f.endswith('.bmp')]
    avis = [f for f in files if f.endswith('.avi')]

    # first a test run and if no exception occurs, rename the files
    process_pictures(rename_func, bitmaps, rename=False)
    process_movies(rename_func, avis, rename=False)
    process_pictures(rename_func, bitmaps, rename=True)
    process_movies(rename_func, avis, rename=True)


def process_movies(rename_func, avis, rename=False):
    ''' renames all given movies

    Movie file names only consist of the abbreviated identifer

    rename_func: function that accepts an abbreviated identifier
                 and returns a verbose version
    avis:        list of paths to movie files
    rename:      should the file be renamed or is this a test run
    '''
    for avi in avis:
        source = get_file_info(avi)
        verbose_name = rename_func(source.name)
        mock_or_rename(source, verbose_name, rename)


def process_pictures(rename_func, bitmaps, rename=False):
    ''' renames all given pictures and collects the measured data

    Movie file names only consist of the abbreviated identifer

    rename_func: function that accepts an abbreviated identifier
                 and returns a verbose version
    bitmaps:     list of paths to bitmap files
    rename:      should the file be renamed or is this a test run
    '''
    # prepare a container for the measurement results
    measurements = OrderedDict()
    for bmp in bitmaps:
        # retrive the measured value and the verbose identification
        source = get_file_info(bmp)
        measurement = extract_measurement(source.name)
        verbose_id = rename_func(measurement.id)
        # record the measured values
        if verbose_id not in measurements:
            measurements[verbose_id] = {'left': {}, 'right':{}}
        measurements[verbose_id]['left'][measurement.type] = measurement.left
        measurements[verbose_id]['right'][measurement.type] = measurement.right
        # renaming the file
        verbose_name = VERBOSE_TEMPLATE.format(i=verbose_id, m=measurement)
        mock_or_rename(source, verbose_name, rename)
    write_results(measurements, source.dir)


def extract_measurement(filename):
    ''' extract the abbr. id, type and result of a measurement in a filename

    A measurement is made up from the abbeviated id, type, left and right
    contact angle separated by a space.

    filename: the file name without extension
    '''
    # use rsplit, the raw_id might contain a space itself
    raw_id, raw_type, raw_left, raw_right = filename.rsplit(' ', 3)
    measurement_type = get_measurement_type(raw_type)
    left = get_contact_angle(raw_left)
    right = get_contact_angle(raw_right)
    return Measurement(raw_id, measurement_type, left, right)


def get_measurement_type(abbreviation):
    ''' returns the verbose measurement type

    Measurement types are 'advancing', 'receeding' or 'static'. Mostly they
    will be abbreviated like aca, rca or sca. Since the letter 'a' is present
    in all of them, first test is on 's' then on 'r'.

    abbreviation: the abbreviated measurement type
    '''
    abbr = abbreviation.lower()
    for type_id, measurement_type in TYPE_ABBREVIATIONS:
        if type_id in abbr:
            return measurement_type
    raise ValueError('Unknown measurement type: ' + abbreviation)


def get_contact_angle(raw_angle, decimal_separator=','):
    ''' returns a verified contact angle or a nice NaN representation

    raw_angle:         raw notation of the angle value as found in the
                       file name
    decimal_separator: character used as decimal separator.
                       Since I work in Germany, it defaults to a colon'''
    # the conversion to and from a float is to verify it's a number
    try:
        angle = float(raw_angle.replace(decimal_separator, '.'))
    except ValueError:
        return NAN_STRING
    angle_as_str = '{:.1f}'.format(angle)
    return angle_as_str.replace('.', decimal_separator)


def get_file_info(path):
    ''' returns a named tuple with path, filename, extension, etc.

    path: path of the file
    '''
    folder = os.path.dirname(path)
    filename_with_ext = os.path.basename(path)
    filename, extension = os.path.splitext(filename_with_ext)
    return FileInfo(path, folder, filename, extension)

def mock_or_rename(source, destination_name, rename=False):
    ''' copies a source file to a new destination name or mocks such a 'rename'

    the new 'renamed' file will be in the same directory as the source file

    source:           named tuple (FileInfo)
    destination_name: name of the new file without extension
    rename:           is this a 'real rename' or just a test
    '''
    destination_path = os.path.join(source.dir, destination_name + source.ext)
    if rename:
        shutil.copyfile(source.path, destination_path)
    else:
        print('{} -> {}'.format(source.path, destination_path))


def write_results(measurements, source_dir):
    ''' write the results of the measurements in a text file

    measurements: OrderedDict with the measurement results
    source_dir:   directory to store the results file
    '''
    results_path = os.path.join(source_dir, 'results.xls')
    xls_result_line = 1
    with open(results_path, 'w') as filehandle:
        filehandle.write(tabbed_line(*XLS_HEADLINE1))
        for idx, positions in measurements.items():
            for position, values in positions.items():
                s = values.get('static', NAN_STRING)
                a = values.get('advancing', NAN_STRING)
                r = values.get('receeding', NAN_STRING)
                filehandle.write(tabbed_line(idx, position, s, a, r))
                xls_result_line += 1
        # two blank lines and data aggregation fields
        filehandle.write('\n\n')
        filehandle.write(tabbed_line(*XLS_HEADLINE2))
        formular_tpl = tabbed_line(*XLS_CELLS)
        for human, function in XLS_AGGREGATIONS:
            line = formular_tpl.format(h=human, f=function, x=xls_result_line)
            filehandle.write(line)

def tabbed_line(*args):
    ''' small helper function, returns the argumends a tab separated string '''
    return '\t'.join(args) + '\n'


if __name__ == '__main__':

    def nice_identity(raw_id):
        if len(raw_id) != 3:
            raise ValueError('unparsable identity: ' + raw_id)
        substrate, slide, point = raw_id
        if substrate in '125':
            substrate = 'P(DMAA-{}%MABP)'.format(substrate)
        elif substrate in 'ps':
            substrate = 'P(S-5%MABP)'
        else:
            raise ValueError('Unknown raw id: ' + raw_id)
        return '{}, Slide {}, Measurement {}'.format(substrate, slide, point)

    rename(nice_identity)