Source code for bin.manipulate

__author__ = 'martin'

[docs]__author__ = "Martin Paul Eve"
[docs]__email__ = "martin@martineve.com"

from lxml import etree
import uuid
from copy import deepcopy
import shutil
from lxml import objectify
import re
from debug import Debuggable


[docs]class Manipulate(Debuggable):
    def __init__(self, gv):
        self.gv = gv
        self.debug = self.gv.debug
        Debuggable.__init__(self, '{0} Manipulator'.format(self.mod_name))

    @staticmethod
[docs]    def set_dom_tree(filename):
        p = etree.XMLParser(remove_blank_text=True, resolve_entities=False)

        return etree.parse(filename, p)

    @staticmethod
[docs]    def set_dom_tree_with_parser(filename, p):
        return etree.parse(filename, p)

    @staticmethod
[docs]    def update_tmp_file(fr, to):
        shutil.copy2(fr, to)

    @staticmethod
[docs]    def get_file_text(filename):
        f = open(filename)
        text = f.read()
        f.close()
        return text

    @staticmethod
[docs]    def xml_start(tag):
        return '<' + tag + '>'

    @staticmethod
[docs]    def xml_end(tag):
        return '</' + tag + '>'

    @staticmethod
[docs]    def append_safe(base, child, caller):
        try:
            parent = base

            while True:
                parent = parent.getparent()

                if parent is None:
                    break

                if parent is child:
                    if caller is not None:
                        caller.debug.print_debug(caller, u'Aborting append: attempted to add a parent to its own child')
                    return False

            base.append(child)
            return True
        except:
            base.append(child)
            return True

[docs]    def return_elements(self, xpath):
        tree = self.load_dom_read()
        return tree.xpath(xpath, namespaces=self.namespaces)

    @staticmethod
[docs]    def search_and_replace_dom(tree, search_section, search_element, surround_with):
        for p in tree.xpath(".//" + search_section):
            search_results = p.findall(".//" + search_element)
            i = 0
            for result in search_results:
                i += 1
                new_elem = etree.Element(surround_with)
                new_elem.set("order", str(i))
                new_elem.set("uuid", str(uuid.uuid4()))
                if len(result) > 0:
                    elem_copy = deepcopy(result[0])
                    result.clear()
                    Manipulate.append_safe(new_elem.append, elem_copy, None)
                    Manipulate.append_safe(result, new_elem, None)
                else:
                    new_elem.text = result.text
                    result.clear()
                    Manipulate.append_safe(result, new_elem, None)
        return tree

    @staticmethod
[docs]    def write_output(f, text):
        out = open(f, 'w')
        out.write(text)
        out.close()

    # Returns the value after a searching a list of regex or None if nothing found.
    @staticmethod
[docs]    def try_list_of_regex(file_string, *regex):
        if len(regex) > 0:
            for i in regex:
                val = re.findall(file_string, i)
                if val:
                    return val
            return None
        else:
            return None

    @staticmethod
[docs]    def get_stripped_text(element):
        text = element.text

        if text is None:
            text = ''

        for sub_element in element:
            text += ' ' + Manipulate.get_stripped_text(sub_element)

            if not sub_element.tail is None:
                text += sub_element.tail

        return text

    @staticmethod
[docs]    def replace_value_of_tag(text, new_value):
        obj = objectify.fromstring(text)
        # noinspection PyProtectedMember
        obj.teiHeader.fileDesc.titleStmt.title._setText(new_value)
        return etree.tostring(obj.getroottree(), encoding="unicode")

[docs]    def load_dom_read(self):
        # load the DOM for read only access
        parser = etree.XMLParser(recover=True)
        tree = self.set_dom_tree_with_parser(self.dom_to_load, parser)
        return tree

[docs]    def load_dom_tree(self):
        # load the DOM
        self.update_tmp_file(self.dom_to_load, self.dom_temp_file)
        tree = self.set_dom_tree(self.dom_temp_file)
        return tree

    # replaces a given tag with a list of replace tags
[docs]    def replace(self, text, tag, *params):
        replace_start = ''
        replace_end = ''

        if len(params) > 0:
            for i in params:
                replace_start += self.xml_start(i)
                replace_end += self.xml_end(i)

            text = text.replace(self.xml_start(tag), replace_start).replace(self.xml_end(tag), replace_end)

        else:
            self.debug.print_debug(self, "No parameters passed to replace function")
        return text