Source code for bin.manipulate
__author__ = 'martin'
[docs]__author__ = "Martin Paul Eve"
[docs]__email__ = "martin@martineve.com"
from lxml import etree
import uuid
from copy import deepcopy
import shutil
from lxml import objectify
import re
from debug import Debuggable
[docs]class Manipulate(Debuggable):
def __init__(self, gv):
self.gv = gv
self.debug = self.gv.debug
Debuggable.__init__(self, '{0} Manipulator'.format(self.mod_name))
@staticmethod
[docs] def set_dom_tree(filename):
p = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
return etree.parse(filename, p)
@staticmethod
[docs] def set_dom_tree_with_parser(filename, p):
return etree.parse(filename, p)
@staticmethod
[docs] def update_tmp_file(fr, to):
shutil.copy2(fr, to)
@staticmethod
[docs] def get_file_text(filename):
f = open(filename)
text = f.read()
f.close()
return text
@staticmethod
[docs] def xml_start(tag):
return '<' + tag + '>'
@staticmethod
[docs] def xml_end(tag):
return '</' + tag + '>'
@staticmethod
[docs] def append_safe(base, child, caller):
try:
parent = base
while True:
parent = parent.getparent()
if parent is None:
break
if parent is child:
if caller is not None:
caller.debug.print_debug(caller, u'Aborting append: attempted to add a parent to its own child')
return False
base.append(child)
return True
except:
base.append(child)
return True
[docs] def return_elements(self, xpath):
tree = self.load_dom_read()
return tree.xpath(xpath, namespaces=self.namespaces)
@staticmethod
[docs] def search_and_replace_dom(tree, search_section, search_element, surround_with):
for p in tree.xpath(".//" + search_section):
search_results = p.findall(".//" + search_element)
i = 0
for result in search_results:
i += 1
new_elem = etree.Element(surround_with)
new_elem.set("order", str(i))
new_elem.set("uuid", str(uuid.uuid4()))
if len(result) > 0:
elem_copy = deepcopy(result[0])
result.clear()
Manipulate.append_safe(new_elem.append, elem_copy, None)
Manipulate.append_safe(result, new_elem, None)
else:
new_elem.text = result.text
result.clear()
Manipulate.append_safe(result, new_elem, None)
return tree
@staticmethod
[docs] def write_output(f, text):
out = open(f, 'w')
out.write(text)
out.close()
# Returns the value after a searching a list of regex or None if nothing found.
@staticmethod
[docs] def try_list_of_regex(file_string, *regex):
if len(regex) > 0:
for i in regex:
val = re.findall(file_string, i)
if val:
return val
return None
else:
return None
@staticmethod
[docs] def get_stripped_text(element):
text = element.text
if text is None:
text = ''
for sub_element in element:
text += ' ' + Manipulate.get_stripped_text(sub_element)
if not sub_element.tail is None:
text += sub_element.tail
return text
@staticmethod
[docs] def replace_value_of_tag(text, new_value):
obj = objectify.fromstring(text)
# noinspection PyProtectedMember
obj.teiHeader.fileDesc.titleStmt.title._setText(new_value)
return etree.tostring(obj.getroottree(), encoding="unicode")
[docs] def load_dom_read(self):
# load the DOM for read only access
parser = etree.XMLParser(recover=True)
tree = self.set_dom_tree_with_parser(self.dom_to_load, parser)
return tree
[docs] def load_dom_tree(self):
# load the DOM
self.update_tmp_file(self.dom_to_load, self.dom_temp_file)
tree = self.set_dom_tree(self.dom_temp_file)
return tree
# replaces a given tag with a list of replace tags
[docs] def replace(self, text, tag, *params):
replace_start = ''
replace_end = ''
if len(params) > 0:
for i in params:
replace_start += self.xml_start(i)
replace_end += self.xml_end(i)
text = text.replace(self.xml_start(tag), replace_start).replace(self.xml_end(tag), replace_end)
else:
self.debug.print_debug(self, "No parameters passed to replace function")
return text