Source code for heimpt.heimpt

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Main  program which  initializes the  Monograph  Publication Tool , read the configuration from the json file.
Configuration json file is organized into projects and typesetters. Each project may contain a set of files ordered and
a ordered subset of typesetters.  Typesetter and file arguments can be specifically configured in a pre-defined order.
A specific project can be configured to run in a chain modus, which  takes the output of the previous typesetter as the input for
the current typesetter. If chaining is not set, all the typesetters take the set of files and execute the typesetter and
generate the output.


Usage:
    heimpt.py <config_file> [options]
    heimpt.py import omp [-t <template_file>] (<submission_id> ... | -a)

General Options:
    --interactive      Enable step-by-step interactive mode
    -d --debug         Enable debug output
    -h --help          Display help and quit

import omp Options:
    -t --template=<template_file>
    -a --all-submissions          Import all submissions of any configured presses

Example
--------

python $BUILD_DIR/heimpt.py $BUILD_DIR/configurations/example.json
python $BUILD_DIR/heimpt.py import omp 48
python $BUILD_DIR/heimpt.py import omp -a

Notes
-------
This program may be used to consolidate output files, generated from a certain tool.  But a consolidation tool should
be set as the last tool in a process chain.



References
----------
* Web : https://github.com/withanage/heimpt
* Repository and issue-tracker: https://github.com/withanage/heimpt/issues
* Licensed under terms of GPL 3  license (LICENSE.md)


"""


[docs]__author__ = "Dulip Withanage"
import collections import datetime from debug import Debuggable, Debug from docopt import docopt from globals import GV import os from settingsconfiguration import Settings from subprocess import Popen, PIPE import sys import shutil import uuid import inspect
[docs]SEP = os.path.sep
[docs]class MPT(Debuggable): """ MPT Class Object, which initializes the properties and defines the methods. """ def __init__(self): self.args = self.read_command_line() self.debug = Debug() self.settings = Settings(self.args) self.gv = GV(self.settings) Debuggable.__init__(self, 'Main') if self.args.get('--debug'): self.debug.enable_debug() self.current_result = datetime.datetime.now().strftime( "%Y_%m_%d-%H-%M-%S-") + str(uuid.uuid4())[:4] self.config = None self.all_typesetters = None self.script_folder = os.path.dirname(os.path.realpath(__file__)) @staticmethod
[docs] def read_command_line(): """ Reads and generates a docopt dictionary from the command line parameters. Returns ------- docopt : dictionary A dictionary, where keys are names of command-line elements such as and values are theparsed values of those elements. """ return docopt(__doc__, version='heiMPT 0.0.1')
[docs] def get_module_name(self): """ Reads the name of the module for debugging and logging Returns ------- name string Name of the Module """ name = 'heiMPT' return name
[docs] def call_typesetter(self, args): """Runs typesetter with given arguments Creates the execution path for a typesetter or an application and runs it as a system process. Output, exit-code and system error codes are captured and returned. Parameters ---------- args : list application arguments in the correct oder. Returns ------- output :str system standard output. err :str system standard error. exit_code: str system exit_code. See Also -------- subprocess.Popen() """ args_str = ' '.join(args) if ': ' in args_str: args_str = args_str.replace(': ', ':') self.debug.print_debug( self, "Merging command: file into command:file, can be a problem for some applications") m = args_str.strip().split(' ') process = Popen(m, stdout=PIPE) output, err = process.communicate() exit_code = process.wait() return output, err, exit_code
[docs] def arguments_parse(self, t_props): """ Reads typesetter properties from json configuration and create arguments. Parameters ---------- t_props : dictionary typesetter properties Returns ------- args : list application execution path and arguments in the correct oder. """ args = [] if t_props.get('executable'): args = [t_props.get('executable')] else: self.debug.print_debug( self, self.gv.TYPESETTER_EXECUTABLE_VARIABLE_IS_UNDEFINED) sys.exit(1) arguments = t_props.get("arguments") if arguments: arguments = collections.OrderedDict(sorted(arguments.items())) for a in arguments: args.append(arguments[a]) return args
[docs] def create_output_path( self, p, p_id, args, prefix, uid): """ Creates the output path for the current file Output folder is constructed using project_name, current_time, sequence number of the current typesetter and the sequence number of the current file. Parameters --------- p: dictionary json program properties p_id: int typesetter id args : list application arguments in the correct oder. prefix: str file name prefix of the current file uid: str unique id of the current current typesetter Returns -------- True: boolean Returns True if the output file is created See Also -------- os.makedirs() """ config_args = p.get('typesetters')[p_id].get("arguments") if config_args is None: self.debug.print_debug( self, self.gv.TYPESETTER_ARGUMENTS_NOT_DEFINED) sys.exit(1) ts_args = collections.OrderedDict( sorted(config_args.items())) out_type = p.get('typesetters')[p_id].get("out_type") out_path = os.path.join(p.get('path'), uid) for i in ts_args: arg = ts_args[i] if arg == '--create-dir': args.append(out_path) else: args.append(arg) self.debug.print_debug( self, '{} {}'.format('Execute', ' '.join(args))) return True
[docs] def run_typesetter(self, p, pre_path, pre_out_type, p_id, uid, f_id, f_name, args): """ Creates the temporary output path, calls the typesetter and writes the outtput to the correct path for a certain file Parameters --------- p: dictionary json program properties pre_path: str project path of the previous iteration pre_out_type : str output type of the previous iteration p_id: int typesetter id uid: str unique id of the current current typesetter f_id: int sequence number of the current file f_name: str name of the current file args : list application arguments in the correct oder. Returns -------- p_path : str project output path of the current typesetter pf_type : str project file type of the current typesetter See Also -------- call_typesetter, organize_output """ p_path = '' pf_type = '' prefix = f_name.split('.')[0] if p_id == min(i for i in p['typesetters']): f_path = os.path.join(p.get('path'), f_name) elif p.get("chain"): f_path = os.path.join(pre_path, prefix + '.' + pre_out_type) if os.path.isfile(f_path) or p['typesetters'].get(p_id).get('expand'): self.debug.print_console(self, '\t{}:\t {} '.format('Processing', prefix)) self.gv.log.append(prefix) args.append(f_path) self.create_output_path(p, p_id, args, prefix, uid) output, err, exit_code = self.call_typesetter(args) self.debug.print_debug(self, output.decode('utf-8')) p_path = self.organize_output( p, p_id, prefix, f_id, uid, args) pf_type = p.get('typesetters')[p_id].get("out_type") else: self.debug.print_debug( self, self.gv.PROJECT_INPUT_FILE_DOES_NOT_EXIST + ' ' + os.path.join(f_path)) return p_path, pf_type
[docs] def typeset_file(self, p, pre_path, pre_out_type, p_id, uid, f_id, f_name): """ Typesets the current file Parameters --------- p: dictionary json program properties pre_path: str project path of the previous iteration pre_out_type : str output type of the previous iteration p_id: int typesetter id uid: str unique id of the current current typesetter f_id: int sequence number of the current file f_name: str name of the current file args: list application arguments in the correct oder. Returns -------- p_path : str project output path of the current typesetter pf_type : str project file type of the current typesetter See Also -------- run_typesetter """ t_props = self.all_typesetters.get( p.get('typesetters')[p_id].get("name")) p_path, pf_type = '', '' if t_props: mt = self.arguments_parse(t_props) if self.gv.check_program(t_props.get('executable')): p_path, pf_type = self.run_typesetter( p, pre_path, pre_out_type, p_id, uid, f_id, f_name, mt) else: self.debug.print_debug( self, t_props.get('executable')+self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE) else: self.debug.print_debug( self, self.gv.PROJECT_TYPESETTER_IS_NOT_AVAILABLE) return p_path, pf_type
[docs] def typeset_files(self, p, pre_path, pre_out_type, pre_id): """ Typeset all files of a certain project Parameters --------- p: dictionary json program properties pre_path: str project path of the previously executed typesetter pre_out_type: str project file type of the previously executed typesetter pre_id :int sequence number of the previously executed file Returns -------- p_path : str project output path of the current typesetter pf_type : str project file type of the current typesetter See Also -------- typeset_file """ p_path, pf_type = '', '' uid = str(uuid.uuid4()) project_files = collections.OrderedDict( sorted((int(key), value) for key, value in list(p.get('files').items()))) if p.get('typesetters')[pre_id].get("expand"): f_name = self.gv.uuid p_path, pf_type = self.typeset_file( p, pre_path, pre_out_type, pre_id, uid, 0, f_name ) else: for f_id in project_files: f_name = project_files[f_id] p_path, pf_type = self.typeset_file( p, pre_path, pre_out_type, pre_id, uid, f_id, f_name ) return p_path, pf_type
[docs] def typeset_project(self, p): """ Typesets a certain project Parameters --------- p: dictionary json program properties Returns -------- True: boolean Returns True, if all the typesetters in project has run successfully. See Also -------- typeset_files """ typesetters_ordered, temp_path, temp_pre_out_type = '', '', '' pre_path = '' prev_out_type = '' if p.get('active'): self.debug.print_console(self, 'PROJECT : ' + p.get('name')) self.gv.log.append(p.get("name")) ts = p.get('typesetters') if ts: typesetters_ordered = collections.OrderedDict( sorted(ts.items())) else: self.debug.print_debug( self, self.gv.PROJECT_TYPESETTERS_ARE_NOT_SPECIFIED) if self.all_typesetters is None: self.debug.print_debug( self, self.gv.PROJECT_TYPESETTER_VAR_IS_NOT_SPECIFIED) sys.exit(1) for p_id in typesetters_ordered: self.debug.print_console(self, ' '.join( ['Step', p_id, ':', '\t', p.get('typesetters')[p_id].get("name")])) self.gv.log.append('{} {}'.format(p_id, p.get('typesetters')[p_id].get("name"))) temp_path, temp_pre_out_type = self.typeset_files( p, pre_path, prev_out_type, p_id ) pre_path = temp_path prev_out_type = temp_pre_out_type else: self.debug.print_debug( self, self.gv.PROJECT_IS_NOT_ACTIVE + ' ' + p.get('name')) return True
[docs] def typeset_all_projects(self): """ Typeset all projects defined in the json file Returns -------- True: boolean Returns True, if the all the typesetters in project run See Also -------- typeset_project """ projects = self.config.get('projects') if projects: for p in projects: self.typeset_project(p) else: self.debug.print_debug(self, self.gv.PROJECTS_VAR_IS_NOT_SPECIFIED) return True
[docs] def organize_output(self, p, p_id, prefix, f_id, uid ,args): """ Copy the temporary results into the final project path This method reads the temporary results of the current typesetter step and copies them in to the correct output folder. Output folder is constructed using project_name, current_time, sequence number of the current typesetter and the sequence number of the current file. Customized tool specific actions are also defined and handled here. Parameters ------------ p: dict json program properties p_id: int typesetter id prefix: str file name prefix of the current file f_id: int sequence number of the current file uid: str unique id of the current current typesetter args: bytearray tool parameters , executable file is first element Returns -------- project_path: str Final path for the current file See Also -------- create_merged_file, gv.create_dirs_recursive """ p_name = p.get('typesetters')[p_id].get("name") t_path = [p.get('path'), uid] if args: if len([arg for arg in args if 'meTypeset.py' in arg]) > 0: t_path += ['nlm'] else: t_path += [p.get('path'), uid] out_type = p['typesetters'][p_id].get('out_type') if out_type is None: self.debug.print_console( self, self.gv.PROJECT_OUTPUT_FILE_TYPE_IS_NOT_SPECIFIED) sys.exit(1) project_path = [p.get('path'), p['name'], self.current_result, p_id + '_' + p_name, out_type] temp_dir = os.path.join(p.get('path'), uid) if p['typesetters'][p_id].get('merge'): self.create_merged_file(p, p_id, project_path, t_path) if len(list(p.get('files').items())) == f_id: shutil.rmtree(temp_dir) elif p['typesetters'][p_id].get('expand'): for filename in os.listdir(temp_dir): p_path = self.gv.create_dirs_recursive(project_path) f_path = '{}{}{}'.format(p_path, SEP, filename) os.rename(os.path.join(temp_dir, filename), f_path) shutil.rmtree(temp_dir) elif p['typesetters'][p_id].get('process'): if p_name.lower() == 'metypeset' and not os.path.exists(SEP.join(t_path)): t_path.append('nlm') t_path.append(prefix + '.' + out_type) p_path = self.gv.create_dirs_recursive(project_path) f_path = '{}{}{}.{}'.format(p_path, SEP, prefix, out_type) try: os.rename(SEP.join(t_path), f_path) shutil.rmtree(temp_dir) except FileNotFoundError: print('File not found\t{}',SEP.join(t_path)) sys.exit(1) else: self.debug.print_debug( self, self.gv.PROJECT_TYPESETTER_PROCESS_METHOD_NOT_SPECIFIED) if len(list(p.get('typesetters').items())) == int(p_id) and int(f_id) == len(list(p.get('files').items())): zip_path = ''.join([p.get('path'),SEP, p['name']]) shutil.make_archive('{}/{}'.format(zip_path, p.get("name")),'zip', zip_path) return SEP.join(project_path)
[docs] def create_merged_file(self, p, p_id, project_path, t_path): """ Create a combined file from a set of input files Parameters ------------ p: dict json program properties p_id: int typesetter id t_path : str temporary output directory project_path : str system path to be created See Also -------- create_named_file() """ t_path.append(self.gv.uuid) p_path = self.gv.create_dirs_recursive(project_path) f_path = '{}{}{}.xml'.format(p_path, SEP, self.gv.uuid) shutil.copy2(SEP.join(t_path), f_path) self.create_named_file(p, p_id, p_path, t_path) return f_path
[docs] def create_named_file(self, p, p_id, p_path, t_path,): """ Copy unique file name to a named file p: dict json program properties p_id: int typesetter id t_path : str temporary output directory p_path : str output directory for the current typesetter """ f = p['typesetters'][p_id].get('out_file') if f: shutil.copy2(SEP.join(t_path), '{}{}{}'.format(p_path, SEP, f)) return
[docs] def run_modules(self): """ Run MPT in module mode """ # Run import modules if self.args.get('import'): sys.path.insert(0, os.path.join(self.script_folder, 'plugins', 'import')) import ImportInterface if self.args.get('omp'): m = "omp" plugin_package = __import__(m, fromlist=['*']) plugin_module = getattr(plugin_package, m) # Find class inheriting form Import abstract class in the module for name in dir(plugin_module): candidate = getattr(plugin_module, name) if inspect.isclass(candidate)\ and issubclass(candidate, ImportInterface.Import)\ and candidate is not ImportInterface.Import: plugin_class = candidate print(("Found import plugin", name, plugin_class)) plugin = plugin_class() self.debug.print_console(self, str(self.args)) plugin.run(self.args, {'base-path': self.script_folder}) # try: # plugin_module = __import__(m) # plugin_module.plugin.run() # except Exception as e: # print('{} {}: {}'.format(m, 'method import failed', e)) # sys.exit(0) else: self.debug.fatal_error(self, "Unsupported arguments: " + self.args) return
[docs] def check_applications(self): """ Check if program binaries are available """ ps = self.config.get('projects') psf = [s for s in ps if s.get('active') == True] ts = self.config.get('typesetters') for p in [ts[i]['arguments'] for i in ts]: for k in [j for j in list(p.values()) if j.find('--formatter') == 0]: for l in k.split('=')[1].split(','): if not self.gv.check_program(self.gv.apps.get(l.lower())): self.debug.fatal_error(self, '{} {}'.format(self.gv.apps.get( l.lower()), self.gv.apps.get(l.lower())+self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE)) sys.exit(1) for p in [ts[i]['executable'] for i in ts]: if not self.gv.check_program(p): self.debug.fatal_error(self, '{} {}'.format( p, self.gv.apps.get(l.lower())+self.gv.TYPESETTER_BINARY_IS_UNAVAILABLE)) sys.exit(1)
[docs]def main(): """ main method, initializes the Monograph Publication Tool and runs the configuration See Also -------- run """ pi = MPT() if pi.args['import']: pi.run_modules() else: try : pi.config = pi.gv.read_json(pi.args['<config_file>']) except: print ('JSONError') sys.exit(1) pi.all_typesetters = pi.config.get('typesetters') pi.typeset_all_projects()
if __name__ == '__main__': main()