''' collecting data from measured contact angles While measuring contact angles, for each drop three pictures are recorded for the static, advancing and the receeding contact angle. Usually the file name of this pictures consists of four parts: 1. an identifier for the measurement 2. what type of angle was measured, e.g. RCA for receeding contact angle 3. the measured contact angle on the left side of the drop 4. the measured contact angle on the right side of the drop So a typical file name looks quite cyptic like: 'PS1 RCA 12,5 15,6' In adittion to cryptic file names with abbreviations, there is normally quite an amount of files: 5 measurements x 3 types = 15 files for one substrate or coating alone. This script provides means to rename these files to a more verbose version and to collect the data into a text file that can be imported in excel. Since the naming of the type of measurement and the contact angles themselves are somehow fixed, the 'user defined action' is renaming the identifier. To do such a rename, a function accepting the identifier as string and returning a verbose version as a string needs to be provided An Example >>> import conactangle as ca >>> def verbose_id(identifier): ... if identifier.startswith('PS'): ... verbose_name = 'Polystyrol' ... else: ... verbose_name = 'Unknown Substrate' ... number = identifier[-1] ... return '{}, Measurement {}'.format(verbose_name, number) ... >>> ca.rename(verbose_id, path='/example/directory') This will change the cryptic filename 'PS1 RCA 12,5 15,6.bmp' to 'Polystyrol, Measurement 1, receeding, L12,5 R15,6.bmp' To not tinker with the original files, the 'renaming' is done on a copy of the orignal data. The results file is called 'results.txt' and is created in the same directory where the raw data resides. This is actually a two step process: first the files are processed and the future result of the rename is printed to stdout. If no exception occurs, the renaming of the files is done in a second round. ''' import os import shutil from collections import namedtuple, OrderedDict # constants VERBOSE_TEMPLATE = '{i}, {m.type}, L{m.left} R{m.right}' TYPE_ABBREVIATIONS = [(a[0], a) for a in ['static', 'receeding', 'advancing']] NAN_STRING = '-' # constants for excel output XLS_HEADLINE1 = ('Measurement', 'Position', 'Static', 'Advancing', 'Receeding') XLS_HEADLINE2 = ('Measurement', '', 'Static', 'Advancing', 'Receeding') XLS_CELLS = ('=A2', '{h}', '={f}(C2:C{x})', '={f}(D2:D{x})', '={f}(E2:E{x})') XLS_AGGREGATIONS = ( ('n', 'ANZAHL'), ('Mittelwert', 'MITTELWERT'), ('Std.Abw.', 'STABW')) # records the name of a source file FileInfo = namedtuple('FileInfo', ['path', 'dir', 'name', 'ext']) # records the data found in the name of a source file Measurement = namedtuple('Measurement', ['id', 'type', 'left', 'right']) def rename(rename_func, path='.'): ''' renames contact angle source files in a folder rename_func: function that accepts an abbreviated identifier and returns a verbose version path: path to the folder to process xls_dec_sep: decimal separator for the resulting xls file ''' entries = sorted(os.listdir(path)) files = [f for f in entries if not f.startswith('.')] bitmaps = [f for f in files if f.endswith('.bmp')] avis = [f for f in files if f.endswith('.avi')] # first a test run and if no exception occurs, rename the files process_pictures(rename_func, bitmaps, rename=False) process_movies(rename_func, avis, rename=False) process_pictures(rename_func, bitmaps, rename=True) process_movies(rename_func, avis, rename=True) def process_movies(rename_func, avis, rename=False): ''' renames all given movies Movie file names only consist of the abbreviated identifer rename_func: function that accepts an abbreviated identifier and returns a verbose version avis: list of paths to movie files rename: should the file be renamed or is this a test run ''' for avi in avis: source = get_file_info(avi) verbose_name = rename_func(source.name) mock_or_rename(source, verbose_name, rename) def process_pictures(rename_func, bitmaps, rename=False): ''' renames all given pictures and collects the measured data Movie file names only consist of the abbreviated identifer rename_func: function that accepts an abbreviated identifier and returns a verbose version bitmaps: list of paths to bitmap files rename: should the file be renamed or is this a test run ''' # prepare a container for the measurement results measurements = OrderedDict() for bmp in bitmaps: # retrive the measured value and the verbose identification source = get_file_info(bmp) measurement = extract_measurement(source.name) verbose_id = rename_func(measurement.id) # record the measured values if verbose_id not in measurements: measurements[verbose_id] = {'left': {}, 'right':{}} measurements[verbose_id]['left'][measurement.type] = measurement.left measurements[verbose_id]['right'][measurement.type] = measurement.right # renaming the file verbose_name = VERBOSE_TEMPLATE.format(i=verbose_id, m=measurement) mock_or_rename(source, verbose_name, rename) write_results(measurements, source.dir) def extract_measurement(filename): ''' extract the abbr. id, type and result of a measurement in a filename A measurement is made up from the abbeviated id, type, left and right contact angle separated by a space. filename: the file name without extension ''' # use rsplit, the raw_id might contain a space itself raw_id, raw_type, raw_left, raw_right = filename.rsplit(' ', 3) measurement_type = get_measurement_type(raw_type) left = get_contact_angle(raw_left) right = get_contact_angle(raw_right) return Measurement(raw_id, measurement_type, left, right) def get_measurement_type(abbreviation): ''' returns the verbose measurement type Measurement types are 'advancing', 'receeding' or 'static'. Mostly they will be abbreviated like aca, rca or sca. Since the letter 'a' is present in all of them, first test is on 's' then on 'r'. abbreviation: the abbreviated measurement type ''' abbr = abbreviation.lower() for type_id, measurement_type in TYPE_ABBREVIATIONS: if type_id in abbr: return measurement_type raise ValueError('Unknown measurement type: ' + abbreviation) def get_contact_angle(raw_angle, decimal_separator=','): ''' returns a verified contact angle or a nice NaN representation raw_angle: raw notation of the angle value as found in the file name decimal_separator: character used as decimal separator. Since I work in Germany, it defaults to a colon''' # the conversion to and from a float is to verify it's a number try: angle = float(raw_angle.replace(decimal_separator, '.')) except ValueError: return NAN_STRING angle_as_str = '{:.1f}'.format(angle) return angle_as_str.replace('.', decimal_separator) def get_file_info(path): ''' returns a named tuple with path, filename, extension, etc. path: path of the file ''' folder = os.path.dirname(path) filename_with_ext = os.path.basename(path) filename, extension = os.path.splitext(filename_with_ext) return FileInfo(path, folder, filename, extension) def mock_or_rename(source, destination_name, rename=False): ''' copies a source file to a new destination name or mocks such a 'rename' the new 'renamed' file will be in the same directory as the source file source: named tuple (FileInfo) destination_name: name of the new file without extension rename: is this a 'real rename' or just a test ''' destination_path = os.path.join(source.dir, destination_name + source.ext) if rename: shutil.copyfile(source.path, destination_path) else: print('{} -> {}'.format(source.path, destination_path)) def write_results(measurements, source_dir): ''' write the results of the measurements in a text file measurements: OrderedDict with the measurement results source_dir: directory to store the results file ''' results_path = os.path.join(source_dir, 'results.xls') xls_result_line = 1 with open(results_path, 'w') as filehandle: filehandle.write(tabbed_line(*XLS_HEADLINE1)) for idx, positions in measurements.items(): for position, values in positions.items(): s = values.get('static', NAN_STRING) a = values.get('advancing', NAN_STRING) r = values.get('receeding', NAN_STRING) filehandle.write(tabbed_line(idx, position, s, a, r)) xls_result_line += 1 # two blank lines and data aggregation fields filehandle.write('\n\n') filehandle.write(tabbed_line(*XLS_HEADLINE2)) formular_tpl = tabbed_line(*XLS_CELLS) for human, function in XLS_AGGREGATIONS: line = formular_tpl.format(h=human, f=function, x=xls_result_line) filehandle.write(line) def tabbed_line(*args): ''' small helper function, returns the argumends a tab separated string ''' return '\t'.join(args) + '\n' if __name__ == '__main__': def nice_identity(raw_id): if len(raw_id) != 3: raise ValueError('unparsable identity: ' + raw_id) substrate, slide, point = raw_id if substrate in '125': substrate = 'P(DMAA-{}%MABP)'.format(substrate) elif substrate in 'ps': substrate = 'P(S-5%MABP)' else: raise ValueError('Unknown raw id: ' + raw_id) return '{}, Slide {}, Measurement {}'.format(substrate, slide, point) rename(nice_identity)