Initial commit

8 years ago · cbce1e1004
2 changed files with 303 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -1,2 +1,49 @@
				@@ -1,2 +1,49 @@
-# contactangle
+collecting data from measured contact angles
+============================================

+While measuring contact angles, for each drop three pictures are recorded
+for the static, advancing and the receeding contact angle.
+
+Usually the file name of this pictures consists of four parts:
+
+1. an identifier for the measurement
+2. what type of angle was measured, e.g. RCA for receeding contact angle
+3. the measured contact angle on the left side of the drop
+4. the measured contact angle on the right side of the drop
+
+So a typical file name looks quite cyptic like: 'PS1 RCA 12,5 15,6'
+
+In adittion to cryptic file names with abbreviations, there is normally
+quite an amount of files: 5 measurements x 3 types = 15 files for one
+substrate or coating alone.
+
+This script provides means to rename these files to a more verbose version
+and to collect the data into a text file that can be imported in excel.
+
+Since the naming of the type of measurement and the contact angles themselves
+are somehow fixed, the 'user defined action' is renaming the identifier. To
+do such a rename, a function accepting the identifier as string and returning
+a verbose version as a string needs to be provided
+
+An Example
+    >>> import conactangle as ca
+    >>> def verbose_id(identifier):
+    ...     if identifier.startswith('PS'):
+    ...        verbose_name = 'Polystyrol'
+    ...     else:
+    ...        verbose_name = 'Unknown Substrate'
+    ...     number = identifier[-1]
+    ...     return '{}, Measurement  {}'.format(verbose_name, number)
+    ...
+    >>> ca.rename(verbose_id, path='/example/directory')
+
+This will change the cryptic filename 'PS1 RCA 12,5 15,6.bmp' to
+'Polystyrol, Measurement 1, receeding, L12,5 R15,6.bmp'
+
+To not tinker with the original files, the 'renaming' is done on a copy of
+the orignal data. The results file is called 'results.txt' and is created
+in the same directory where the raw data resides.
+
+This is actually a two step process: first the files are processed and the
+future result of the rename is printed to stdout. If no exception occurs,
+the renaming of the files is done in a second round.
--- a/contactangle.py
+++ b/contactangle.py
@ -0,0 +1,255 @@
				@@ -0,0 +1,255 @@
+''' collecting data from measured contact angles
+
+While measuring contact angles, for each drop three pictures are recorded
+for the static, advancing and the receeding contact angle.
+
+Usually the file name of this pictures consists of four parts:
+1. an identifier for the measurement
+2. what type of angle was measured, e.g. RCA for receeding contact angle
+3. the measured contact angle on the left side of the drop
+4. the measured contact angle on the right side of the drop
+
+So a typical file name looks quite cyptic like: 'PS1 RCA 12,5 15,6'
+
+In adittion to cryptic file names with abbreviations, there is normally
+quite an amount of files: 5 measurements x 3 types = 15 files for one
+substrate or coating alone.
+
+This script provides means to rename these files to a more verbose version
+and to collect the data into a text file that can be imported in excel.
+
+Since the naming of the type of measurement and the contact angles themselves
+are somehow fixed, the 'user defined action' is renaming the identifier. To
+do such a rename, a function accepting the identifier as string and returning
+a verbose version as a string needs to be provided
+
+An Example
+>>> import conactangle as ca
+>>> def verbose_id(identifier):
+...     if identifier.startswith('PS'):
+...        verbose_name = 'Polystyrol'
+...     else:
+...        verbose_name = 'Unknown Substrate'
+...     number = identifier[-1]
+...     return '{}, Measurement  {}'.format(verbose_name, number)
+...
+>>> ca.rename(verbose_id, path='/example/directory')
+This will change the cryptic filename 'PS1 RCA 12,5 15,6.bmp' to
+'Polystyrol, Measurement 1, receeding, L12,5 R15,6.bmp'
+
+To not tinker with the original files, the 'renaming' is done on a copy of
+the orignal data. The results file is called 'results.txt' and is created
+in the same directory where the raw data resides.
+
+This is actually a two step process: first the files are processed and the
+future result of the rename is printed to stdout. If no exception occurs,
+the renaming of the files is done in a second round.
+'''
+
+import os
+import shutil
+from collections import namedtuple, OrderedDict
+
+# constants
+VERBOSE_TEMPLATE = '{i}, {m.type}, L{m.left} R{m.right}'
+TYPE_ABBREVIATIONS = [(a[0], a) for a in ['static', 'receeding', 'advancing']]
+NAN_STRING = '-'
+
+# constants for excel output
+XLS_HEADLINE1 = ('Measurement', 'Position', 'Static', 'Advancing', 'Receeding')
+XLS_HEADLINE2 = ('Measurement', '', 'Static', 'Advancing', 'Receeding')
+XLS_CELLS = ('=A2', '{h}', '={f}(C2:C{x})', '={f}(D2:D{x})', '={f}(E2:E{x})')
+XLS_AGGREGATIONS = (
+    ('n', 'ANZAHL'), ('Mittelwert', 'MITTELWERT'), ('Std.Abw.', 'STABW'))
+
+
+# records the name of a source file
+FileInfo = namedtuple('FileInfo', ['path', 'dir', 'name', 'ext'])
+
+# records the data found in the name of a source file
+Measurement = namedtuple('Measurement', ['id', 'type', 'left', 'right'])
+
+
+def rename(rename_func, path='.'):
+    ''' renames contact angle source files in a folder
+
+    rename_func: function that accepts an abbreviated identifier
+                 and returns a verbose version
+    path:        path to the folder to process
+    xls_dec_sep: decimal separator for the resulting xls file
+    '''
+    entries = sorted(os.listdir(path))
+    files = [f for f in entries if not f.startswith('.')]
+    bitmaps = [f for f in files if f.endswith('.bmp')]
+    avis = [f for f in files if f.endswith('.avi')]
+
+    # first a test run and if no exception occurs, rename the files
+    process_pictures(rename_func, bitmaps, rename=False)
+    process_movies(rename_func, avis, rename=False)
+    process_pictures(rename_func, bitmaps, rename=True)
+    process_movies(rename_func, avis, rename=True)
+
+
+def process_movies(rename_func, avis, rename=False):
+    ''' renames all given movies
+
+    Movie file names only consist of the abbreviated identifer
+
+    rename_func: function that accepts an abbreviated identifier
+                 and returns a verbose version
+    avis:        list of paths to movie files
+    rename:      should the file be renamed or is this a test run
+    '''
+    for avi in avis:
+        source = get_file_info(avi)
+        verbose_name = rename_func(source.name)
+        mock_or_rename(source, verbose_name, rename)
+
+
+def process_pictures(rename_func, bitmaps, rename=False):
+    ''' renames all given pictures and collects the measured data
+
+    Movie file names only consist of the abbreviated identifer
+
+    rename_func: function that accepts an abbreviated identifier
+                 and returns a verbose version
+    bitmaps:     list of paths to bitmap files
+    rename:      should the file be renamed or is this a test run
+    '''
+    # prepare a container for the measurement results
+    measurements = OrderedDict()
+    for bmp in bitmaps:
+        # retrive the measured value and the verbose identification
+        source = get_file_info(bmp)
+        measurement = extract_measurement(source.name)
+        verbose_id = rename_func(measurement.id)
+        # record the measured values
+        if verbose_id not in measurements:
+            measurements[verbose_id] = {'left': {}, 'right':{}}
+        measurements[verbose_id]['left'][measurement.type] = measurement.left
+        measurements[verbose_id]['right'][measurement.type] = measurement.right
+        # renaming the file
+        verbose_name = VERBOSE_TEMPLATE.format(i=verbose_id, m=measurement)
+        mock_or_rename(source, verbose_name, rename)
+    write_results(measurements, source.dir)
+
+
+def extract_measurement(filename):
+    ''' extract the abbr. id, type and result of a measurement in a filename
+
+    A measurement is made up from the abbeviated id, type, left and right
+    contact angle separated by a space.
+
+    filename: the file name without extension
+    '''
+    # use rsplit, the raw_id might contain a space itself
+    raw_id, raw_type, raw_left, raw_right = filename.rsplit(' ', 3)
+    measurement_type = get_measurement_type(raw_type)
+    left = get_contact_angle(raw_left)
+    right = get_contact_angle(raw_right)
+    return Measurement(raw_id, measurement_type, left, right)
+
+
+def get_measurement_type(abbreviation):
+    ''' returns the verbose measurement type
+
+    Measurement types are 'advancing', 'receeding' or 'static'. Mostly they
+    will be abbreviated like aca, rca or sca. Since the letter 'a' is present
+    in all of them, first test is on 's' then on 'r'.
+
+    abbreviation: the abbreviated measurement type
+    '''
+    abbr = abbreviation.lower()
+    for type_id, measurement_type in TYPE_ABBREVIATIONS:
+        if type_id in abbr:
+            return measurement_type
+    raise ValueError('Unknown measurement type: ' + abbreviation)
+
+
+def get_contact_angle(raw_angle, decimal_separator=','):
+    ''' returns a verified contact angle or a nice NaN representation
+
+    raw_angle:         raw notation of the angle value as found in the
+                       file name
+    decimal_separator: character used as decimal separator.
+                       Since I work in Germany, it defaults to a colon'''
+    # the conversion to and from a float is to verify it's a number
+    try:
+        angle = float(raw_angle.replace(decimal_separator, '.'))
+    except ValueError:
+        return NAN_STRING
+    angle_as_str = '{:.1f}'.format(angle)
+    return angle_as_str.replace('.', decimal_separator)
+
+
+def get_file_info(path):
+    ''' returns a named tuple with path, filename, extension, etc.
+
+    path: path of the file
+    '''
+    folder = os.path.dirname(path)
+    filename_with_ext = os.path.basename(path)
+    filename, extension = os.path.splitext(filename_with_ext)
+    return FileInfo(path, folder, filename, extension)
+
+def mock_or_rename(source, destination_name, rename=False):
+    ''' copies a source file to a new destination name or mocks such a 'rename'
+
+    the new 'renamed' file will be in the same directory as the source file
+
+    source:           named tuple (FileInfo)
+    destination_name: name of the new file without extension
+    rename:           is this a 'real rename' or just a test
+    '''
+    destination_path = os.path.join(source.dir, destination_name + source.ext)
+    if rename:
+        shutil.copyfile(source.path, destination_path)
+    else:
+        print('{} -> {}'.format(source.path, destination_path))
+
+
+def write_results(measurements, source_dir):
+    ''' write the results of the measurements in a text file
+
+    measurements: OrderedDict with the measurement results
+    source_dir:   directory to store the results file
+    '''
+    results_path = os.path.join(source_dir, 'results.xls')
+    xls_result_line = 1
+    with open(results_path, 'w') as filehandle:
+        filehandle.write(tabbed_line(*XLS_HEADLINE1))
+        for idx, positions in measurements.items():
+            for position, values in positions.items():
+                s = values.get('static', NAN_STRING)
+                a = values.get('advancing', NAN_STRING)
+                r = values.get('receeding', NAN_STRING)
+                filehandle.write(tabbed_line(idx, position, s, a, r))
+                xls_result_line += 1
+        # two blank lines and data aggregation fields
+        filehandle.write('\n\n')
+        filehandle.write(tabbed_line(*XLS_HEADLINE2))
+        formular_tpl = tabbed_line(*XLS_CELLS)
+        for human, function in XLS_AGGREGATIONS:
+            line = formular_tpl.format(h=human, f=function, x=xls_result_line)
+            filehandle.write(line)
+
+def tabbed_line(*args):
+    ''' small helper function, returns the argumends a tab separated string '''
+    return '\t'.join(args) + '\n'
+
+
+if __name__ == '__main__':
+
+    def nice_identity(raw_id):
+        if len(raw_id) != 3:
+            raise ValueError('unparsable identity: ' + raw_id)
+        substrate, slide, point = raw_id
+        if substrate in '125':
+            substrate = 'P(DMAA-{}%MABP)'.format(substrate)
+        elif substrate in 'ps':
+            substrate = 'P(S-5%MABP)'
+        else:
+            raise ValueError('Unknown raw id: ' + raw_id)
+        return '{}, Slide {}, Measurement {}'.format(substrate, slide, point)
+
+    rename(nice_identity)