Source code for bin.bibliographyclassifier

#!/usr/bin/env python
"""bibliographyclassfier.py: a tool to manipulate reference lists

Usage:
    bibliographyclassifier.py confirm <input> [options]

Options:
    -d, --debug                                     Enable debug output
    --interactive                                   Prompt the user to assist in interactive tagging
    -h, --help                                      Show this screen.
    -v, --version                                   Show version.
"""

from teimanipulate import *
from nlmmanipulate import *
from docopt import docopt
from bare_globals import GV
from interactive import Interactive

[docs]__author__ = "Martin Paul Eve"
[docs]__email__ = "martin@martineve.com"

"""
A class that assists with bibliography classification.
"""

from debug import Debuggable
import codecs


[docs]class BibliographyClassifier(Debuggable):

    def __init__(self, global_variables):
        self.gv = global_variables
        self.debug = self.gv.debug
        Debuggable.__init__(self, 'Bibliography Classifier')

[docs]    def linguistic_cues(self, manipulate, tree):
        self.debug.print_debug(self, u'Using linguistic cue method to classify bibliography')
        language_list = self.gv.settings.get_setting('reference-languages', self).split(',')

        for language in language_list:
            with codecs.open('{0}/language/ref_marker_{1}.txt'.format(self.gv.script_dir, language),
                             encoding='utf-8') as lang_file:
                lines = lang_file.read().split('\n')

                for line in lines:
                    if line.strip() != '':
                        if manipulate.find_references_from_cue(line, tree):
                            return True

[docs]    def run(self):
        if int(self.gv.settings.args['--aggression']) < int(self.gv.settings.get_setting('bibliographyclassifier', self,
                                                                                         domain='aggression')):
            self.debug.print_debug(self, u'Aggression level less than 4: exiting module.')
            return

        tei_manipulator = TeiManipulate(self.gv)

        tree = tei_manipulator.load_dom_tree()

        found = tei_manipulator.find_reference_list_in_word_list(tree)

        if not found:
            found = self.linguistic_cues(tei_manipulator, tree)

        tei_manipulator.enclose_bibliography_tags('//tei:p[@rend="Bibliography"]', 'back', 'div', 'type', 'bibliogr')

[docs]    def unconfirm(self, p, tree):
        # find a potential reference point

        old_reference_points = tree.xpath('//*[@meTypesetRender]')
        parent = False

        old_reference_point = None

        if len(old_reference_points) > 0:
            old_reference_point = old_reference_points[0]
            # determine if ref-list-before or ref-list-parent

            if old_reference_point.attrib['meTypesetRender'] == 'ref-list-parent':
                parent = True
        else:
            # get parent (ref-list)'s parent (back)'s previous sibling (sec)
            old_reference_point = tree.xpath('//sec[last()]')[0]
            parent = True

        if old_reference_point is not None:
            p.tag = 'p'
            if parent:
                Manipulate.append_safe(old_reference_point, p, self)
            else:
                old_reference_point.addnext(p)


[docs]    def handle_input(self, manipulate, opts, p, prompt, sel, tree, text):
        if sel == 'a':
            prompt.print_(u"Leaving interactive mode on user command")
            return "abort"
        elif sel == 'c':
            # confirm
            pass
        elif sel == 'o':
            # confirm all
            return "confirmall"
        elif sel == 'u':
            # delete the surrounding xref
            self.debug.print_debug(self, u'Unconfirming reference {0}'.format(text))
            self.unconfirm(p, tree)
            pass
        elif sel == 'n':
            # delete all
            self.debug.print_debug(self, u'Unconfirming reference {0}'.format(text))
            self.unconfirm(p, tree)
            return "delall"

[docs]    def run_prompt(self, interactive):
        if not interactive:
            self.debug.fatal_error(self, 'Cannot enter confirmation mode without interactive flag')

        prompt = Interactive(self.gv)

        opts = ('Confirm', 'Unconfirm', 'cOnfirm all', 'uNconfirm all', 'Abort')

        manipulate = NlmManipulate(self.gv)

        tree = manipulate.load_dom_tree()

        ref_items = tree.xpath('//back/ref-list/ref')

        # note that we don't want to exit even if there are no references to link because the user may want to delete
        # some

        delete_all = False
        confirm_all = False

        for p in tree.xpath('//ref'):

            text = manipulate.get_stripped_text(p)

            sel = ''

            if delete_all:
                sel = 'u'
            elif confirm_all:
                sel = 'c'
            else:
                prompt.print_(u"Please confirm whether the following is a bibliographic reference: {0}".format(text))
                sel = prompt.input_options(opts)

            result = self.handle_input(manipulate, opts, p, prompt, sel, tree, text)

            if result == 'abort':
                return
            elif result == 'delall':
                delete_all = True
            elif result == 'confirmall':
                confirm_all = True

        manipulate.save_tree(tree)


[docs]def main():
    args = docopt(__doc__, version='meTypeset 0.1')
    bare_gv = GV(args)

    if args['--debug']:
        bare_gv.debug.enable_debug()

    bc_instance = BibliographyClassifier(bare_gv)

    if args['confirm']:
        bc_instance.run_prompt(args['--interactive'])

if __name__ == '__main__':
    main()