#!/usr/bin/env python
# coding=utf-8
"""bibliographydatabase: a tool to match plaintext values inside NLM ref tags against a known database
Usage:
bibliographydatabase.py <input> [options]
bibliographydatabase.py zotero <query> [options]
Options:
-d, --debug Enable debug output.
-h, --help Show this screen.
-v, --version Show version.
-z, --zotero Enable Zotero integration for references.
"""
[docs]__author__ = "Martin Paul Eve"
[docs]__email__ = "martin@martineve.com"
"""
A class that scans for, stores and retrieves NLM citations
"""
from debug import Debuggable
import tempfile
from nlmmanipulate import NlmManipulate
import os
import re
from lxml import etree
from docopt import docopt
from bare_globals import GV
from interactive import Interactive
[docs]class Person():
def __init__(self, firstname='', lastname=''):
self.firstname = firstname
self.lastname = lastname
[docs] def get_citation(self):
return u'<name>' \
u'<surname>{0}</surname>' \
u'<given-names>{1}</given-names>' \
u'</name>'.format(self.lastname, self.firstname)
[docs]class Website():
def __init__(self, authors=None, title='', website_title='', year='', url=''):
if authors is None:
self.authors = []
else:
self.authors = authors
self.website_title = website_title
self.title = title
self.book_title = website_title
self.year = year
self.url = url
@staticmethod
[docs] def object_type():
return "website"
[docs] def get_citation(self):
author_block = ''
for author in self.authors:
author_block += author.get_citation()
ret = u'<ref>'
ret += u'<element-citation publication-type="journal">'
if author_block != '':
ret += u'<person-group person-group-type="author">{0}</person-group>'.format(author_block)
if self.title != '':
ret += u'<article-title>{0}</article-title>'.format(self.title)
if self.book_title != '':
ret += u'<source>{0}</source>'.format(self.website_title)
ret += u'<date><year>{0}</year></date>'.format(self.year)
if self.url != '' and self.url is not None:
ret += u'<uri>{0}</uri>'.format(self.url)
ret += u'</element-citation>'
ret += u'</ref>'
ret = ret.replace('&', '&')
ret = ret.replace('<i>', '<italic>')
ret = ret.replace('</i>', '</italic>')
return ret
[docs]class BookChapter():
def __init__(self, authors=None, title='', book_title='', publisher='', place='', year='', fpage='', lpage='',
editors=None, translators=None, doi=''):
if authors is None:
self.authors = []
else:
self.authors = authors
if editors is None:
self.editors = []
else:
self.editors = editors
if translators is None:
self.translators = []
else:
self.translators = translators
self.title = title
self.book_title = book_title
self.year = year
self.publisher = publisher
self.place = place
self.fpage = fpage
self.lpage = lpage
self.doi = doi
@staticmethod
[docs] def object_type():
return "book chapter"
[docs] def get_citation(self):
author_block = ''
for author in self.authors:
author_block += author.get_citation()
translator_block = ''
for translator in self.translators:
translator_block += translator.get_citation()
editor_block = ''
for editor in self.editors:
editor_block += editor.get_citation()
ret = u'<ref>'
ret += u'<element-citation publication-type="bookchapter">'
if author_block != '':
ret += u'<person-group person-group-type="author">{0}</person-group>'.format(author_block)
if self.title != '':
ret += u'<article-title>{0}</article-title>'.format(self.title)
if self.book_title != '':
ret += u'<source>{0}</source>'.format(self.book_title)
ret += u'<date><year>{0}</year></date>'.format(self.year)
if editor_block != '':
ret += u'<person-group person-group-type="editor">{0}</person-group>'.format(editor_block)
if translator_block != '':
ret += u'<person-group person-group-type="translator">{0}</person-group>'.format(translator_block)
ret += u'<publisher-loc>{0}</publisher-loc>'.format(self.place)
ret += u'<publisher-name>{0}</publisher-name>'.format(self.publisher)
if self.fpage != '' and self.fpage is not None:
ret += u'<fpage>{0}</fpage>'.format(self.fpage)
if self.lpage != '' and self.lpage is not None:
ret += u'<lpage>{0}</lpage>'.format(self.lpage)
if self.doi != '' and self.doi is not None:
ret += u'<pub-id pub-id-type="doi">{0}</pub-id>'.format(self.doi)
ret += u'</element-citation>'
ret += u'</ref>'
ret = ret.replace('&', '&')
ret = ret.replace('<i>', '<italic>')
ret = ret.replace('</i>', '</italic>')
return ret
[docs]class Book():
def __init__(self, authors=None, title='', publisher='', place='', year='', editors=None, doi='', translators=None):
if authors is None:
self.authors = []
else:
self.authors = authors
if editors is None:
self.editors = []
else:
self.editors = editors
if translators is None:
self.translators = []
else:
self.translators = translators
self.title = title
self.year = year
self.publisher = publisher
self.place = place
self.doi = doi
@staticmethod
[docs] def object_type():
return "book"
[docs] def get_citation(self):
author_block = ''
for author in self.authors:
author_block += author.get_citation()
translator_block = ''
for translator in self.translators:
translator_block += translator.get_citation()
editor_block = ''
for editor in self.editors:
editor_block += editor.get_citation()
ret = u'<ref>'
ret += u'<element-citation publication-type="book">'
if author_block != '':
ret += u'<person-group person-group-type="author">{0}</person-group>'.format(author_block)
if self.title != '':
ret += u'<source>{0}</source>'.format(self.title)
ret += u'<date><year>{0}</year></date>'.format(self.year)
if editor_block != '':
ret += u'<person-group person-group-type="editor">{0}</person-group>'.format(editor_block)
if translator_block != '':
ret += u'<person-group person-group-type="translator">{0}</person-group>'.format(translator_block)
ret += u'<publisher-loc>{0}</publisher-loc>'.format(self.place)
ret += u'<publisher-name>{0}</publisher-name>'.format(self.publisher)
if self.doi != '' and self.doi is not None:
ret += u'<pub-id pub-id-type="doi">{0}</pub-id>'.format(self.doi)
ret += u'</element-citation>'
ret += u'</ref>'
ret = ret.replace('&', '&')
ret = ret.replace('<i>', '<italic>')
ret = ret.replace('</i>', '</italic>')
return ret
[docs]class JournalArticle():
def __init__(self, authors=None, title='', journal='', issue='', volume='', fpage='', lpage='', year='', doi='',
translators=None):
if authors is None:
self.authors = []
else:
self.authors = authors
if translators is None:
self.translators = []
else:
self.translators = translators
self.title = title
self.journal = journal
self.issue = issue
self.volume = volume
self.fpage = fpage
self.lpage = lpage
self.year = year
self.doi = doi
@staticmethod
[docs] def object_type():
return "journal article"
[docs] def get_citation(self):
author_block = ''
for author in self.authors:
author_block += author.get_citation()
translator_block = ''
for translator in self.translators:
translator_block += translator.get_citation()
ret = u'<ref>'
ret += u'<element-citation publication-type="journal">'
if author_block != '':
ret += u'<person-group person-group-type="author">{0}</person-group>'.format(author_block)
if self.title != '':
ret += u'<article-title>{0}</article-title>'.format(self.title)
if self.journal != '':
ret += u'<source>{0}</source>'.format(self.journal)
if translator_block != '':
ret += u'<person-group person-group-type="translator">{0}</person-group>'.format(translator_block)
ret += u'<date><year>{0}</year></date>'.format(self.year)
if self.volume != '' and self.volume is not None:
ret += u'<volume>{0}</volume>'.format(self.volume)
if self.issue != '' and self.volume is not None:
ret += u'<issue>{0}</issue>'.format(self.issue)
if self.fpage != '' and self.fpage is not None:
ret += u'<fpage>{0}</fpage>'.format(self.fpage)
if self.lpage != '' and self.lpage is not None:
ret += u'<lpage>{0}</lpage>'.format(self.lpage)
if self.doi != '' and self.doi is not None:
ret += u'<pub-id pub-id-type="doi">{0}</pub-id>'.format(self.doi)
ret += u'</element-citation>'
ret += u'</ref>'
ret = ret.replace('&', '&')
ret = ret.replace('<i>', '<italic>')
ret = ret.replace('</i>', '</italic>')
return ret
[docs]class BibliographyDatabase(Debuggable):
def __init__(self, global_variables):
Debuggable.__init__(self, 'Bibliography Database')
self.gv = global_variables
self.debug = self.gv.debug
self.size_cutoff = 16
self.aggression = 8
[docs] def process_zotero(self):
from zotero import libzotero
zotero = libzotero.LibZotero(self.gv.settings.get_setting(u'zotero', self), self.gv)
manipulate = NlmManipulate(self.gv)
master_tree = manipulate.load_dom_tree()
tree = master_tree.xpath('//back/ref-list/ref')
for element in tree:
original_term = manipulate.get_stripped_text(element)
term = original_term
#term = re.sub(r'(.+?)(\(.+?\))(.*)', r'\1\3', term)
term = re.sub(r'(?<![0-9])[1-9][0-9]{0,2}(?![0-9])', r'', term)
term = re.sub(r'[\-,\.\<\>\(\)\;\:\@\'\#\~\}\{\[\]\"\!\\/]', '', term)
term = re.sub(u'[^\s]+?\s[Ee]dition', u' ', term)
term = re.sub(u'\s.\s', u' ', term)
term = re.sub(u'(?<=[A-Z])\.', u' ', term)
term = term.replace(u'“', u'')
term = term.replace(u'\'s', u'')
term = term.replace(u'’s', u'')
term = term.replace(u'’', u'')
term = term.replace(u' Ed. ', u' ')
term = term.replace(u' Ed ', u' ')
term = term.replace(u' Trans. ', u' ')
term = term.replace(u' Trans ', u' ')
term = term.replace(u' trans ', u' ')
term = term.replace(u' trans. ', u' ')
term = term.replace(u' by. ', u' ')
term = term.replace(u' by ', u' ')
term = term.replace(u' ed. ', u' ')
term = term.replace(u' ed ', u' ')
term = term.replace(u' In ', u' ')
term = term.replace(u' in ', u' ')
term = term.replace(u' print ', u' ')
term = term.replace(u' Print ', u' ')
term = term.replace(u' and ', u' ')
term = term.replace(u'”', u'')
term = re.sub(r'[Aa]ccessed', '', term)
term = re.sub(r'meTypesetbr', '', term)
term = re.sub(r'\s+', ' ', term)
results = zotero.search(term.strip())
while len(results) == 0 and len(term.strip().split(' ')) > 2:
# no results found.
# begin iterating backwards
term = ' '.join(term.strip().split(' ')[:-1])
results = zotero.search(term.strip())
if len(results) == 1:
res = results[0].JATS_format()
if res is not None:
ref = etree.fromstring(res)
if 'id' in element.attrib:
ref.attrib['id'] = element.attrib['id']
element.addnext(ref)
original_term = re.sub(u'--', u'', original_term)
comment = etree.Comment(original_term)
ref.addnext(comment)
element.tag = 'REMOVE'
etree.strip_elements(master_tree, 'REMOVE')
manipulate.save_tree(master_tree)
[docs] def run(self):
if self.gv.use_zotero:
self.process_zotero()
[docs]def main():
args = docopt(__doc__, version='meTypeset 0.1')
bare_gv = GV(args)
if args['--debug']:
bare_gv.debug.enable_debug(True)
bare_gv.debug.enable_prompt(Interactive(bare_gv))
if args['zotero']:
from zotero import libzotero
zotero = libzotero.LibZotero(bare_gv.settings.get_setting(u'zotero', bare_gv), bare_gv)
term = args['<query>']
term = re.sub(r'[\-,\.\<\>\(\)\;\:\@\'\#\~\}\{\[\]\"\!\\/]', ' ', term)
term = term.replace(u'“', u'')
term = re.sub(u'[^\s]+?\s[Ee]dition', u' ', term)
term = term.replace(u'\'s', u'')
term = term.replace(u'’s', u'')
term = term.replace(u'’', u'')
term = term.replace(u'”', u'')
term = re.sub(r'[Aa]ccessed', '', term)
term = re.sub(r'meTypesetbr', '', term)
term = re.sub(r'\s+', ' ', term)
results = zotero.search(term.strip())
bare_gv.debug.print_debug(bare_gv, "%d results for %s" % (len(results), term))
for item in results:
interactive = Interactive(bare_gv)
interactive.print_(item.JATS_format())
return
bibliography_database_instance = BibliographyDatabase(bare_gv)
bibliography_database_instance.run()
if __name__ == '__main__':
main()