Source code for bin.manipulate

__author__ = 'martin'

[docs]__author__ = "Martin Paul Eve"
[docs]__email__ = "martin@martineve.com"
from lxml import etree import uuid from copy import deepcopy import shutil from lxml import objectify import re from debug import Debuggable
[docs]class Manipulate(Debuggable): def __init__(self, gv): self.gv = gv self.debug = self.gv.debug Debuggable.__init__(self, '{0} Manipulator'.format(self.mod_name)) @staticmethod
[docs] def set_dom_tree(filename): p = etree.XMLParser(remove_blank_text=True, resolve_entities=False) return etree.parse(filename, p)
@staticmethod
[docs] def set_dom_tree_with_parser(filename, p): return etree.parse(filename, p)
@staticmethod
[docs] def update_tmp_file(fr, to): shutil.copy2(fr, to)
@staticmethod
[docs] def get_file_text(filename): f = open(filename) text = f.read() f.close() return text
@staticmethod
[docs] def xml_start(tag): return '<' + tag + '>'
@staticmethod
[docs] def xml_end(tag): return '</' + tag + '>'
@staticmethod
[docs] def append_safe(base, child, caller): try: parent = base while True: parent = parent.getparent() if parent is None: break if parent is child: if caller is not None: caller.debug.print_debug(caller, u'Aborting append: attempted to add a parent to its own child') return False base.append(child) return True except: base.append(child) return True
[docs] def return_elements(self, xpath): tree = self.load_dom_read() return tree.xpath(xpath, namespaces=self.namespaces)
@staticmethod
[docs] def search_and_replace_dom(tree, search_section, search_element, surround_with): for p in tree.xpath(".//" + search_section): search_results = p.findall(".//" + search_element) i = 0 for result in search_results: i += 1 new_elem = etree.Element(surround_with) new_elem.set("order", str(i)) new_elem.set("uuid", str(uuid.uuid4())) if len(result) > 0: elem_copy = deepcopy(result[0]) result.clear() Manipulate.append_safe(new_elem.append, elem_copy, None) Manipulate.append_safe(result, new_elem, None) else: new_elem.text = result.text result.clear() Manipulate.append_safe(result, new_elem, None) return tree
@staticmethod
[docs] def write_output(f, text): out = open(f, 'w') out.write(text) out.close()
# Returns the value after a searching a list of regex or None if nothing found. @staticmethod
[docs] def try_list_of_regex(file_string, *regex): if len(regex) > 0: for i in regex: val = re.findall(file_string, i) if val: return val return None else: return None
@staticmethod
[docs] def get_stripped_text(element): text = element.text if text is None: text = '' for sub_element in element: text += ' ' + Manipulate.get_stripped_text(sub_element) if not sub_element.tail is None: text += sub_element.tail return text
@staticmethod
[docs] def replace_value_of_tag(text, new_value): obj = objectify.fromstring(text) # noinspection PyProtectedMember obj.teiHeader.fileDesc.titleStmt.title._setText(new_value) return etree.tostring(obj.getroottree(), encoding="unicode")
[docs] def load_dom_read(self): # load the DOM for read only access parser = etree.XMLParser(recover=True) tree = self.set_dom_tree_with_parser(self.dom_to_load, parser) return tree
[docs] def load_dom_tree(self): # load the DOM self.update_tmp_file(self.dom_to_load, self.dom_temp_file) tree = self.set_dom_tree(self.dom_temp_file) return tree
# replaces a given tag with a list of replace tags
[docs] def replace(self, text, tag, *params): replace_start = '' replace_end = '' if len(params) > 0: for i in params: replace_start += self.xml_start(i) replace_end += self.xml_end(i) text = text.replace(self.xml_start(tag), replace_start).replace(self.xml_end(tag), replace_end) else: self.debug.print_debug(self, "No parameters passed to replace function") return text