From 704ea367439f6faf88343b5ee50a438900c96aca Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 18 Mar 2019 14:48:06 -0700 Subject: refactor and test citeproc code --- python/fatcat_tools/transforms/__init__.py | 2 +- python/fatcat_tools/transforms/csl.py | 56 ++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 3 deletions(-) (limited to 'python/fatcat_tools/transforms') diff --git a/python/fatcat_tools/transforms/__init__.py b/python/fatcat_tools/transforms/__init__.py index 4950433b..863472c0 100644 --- a/python/fatcat_tools/transforms/__init__.py +++ b/python/fatcat_tools/transforms/__init__.py @@ -1,4 +1,4 @@ from .entities import entity_to_dict, entity_from_json from .elasticsearch import release_to_elasticsearch, container_to_elasticsearch, changelog_to_elasticsearch -from .csl import release_to_csl +from .csl import release_to_csl, citeproc_csl diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py index c2e90539..3509f350 100644 --- a/python/fatcat_tools/transforms/csl.py +++ b/python/fatcat_tools/transforms/csl.py @@ -1,6 +1,13 @@ - +import json import collections + +from citeproc import CitationStylesStyle, CitationStylesBibliography +from citeproc import Citation, CitationItem +from citeproc import formatter +from citeproc.source.json import CiteProcJSON +from citeproc_styles import get_style_filepath + from fatcat_client import ApiClient @@ -8,7 +15,7 @@ def contribs_by_role(contribs, role): ret = [c.copy() for c in contribs if c['role'] == role] [c.pop('role') for c in ret] # XXX: - [c.pop('literal') for c in ret] + [c.pop('literal') for c in ret if 'literal' in c] if not ret: return None else: @@ -29,6 +36,11 @@ def release_to_csl(entity): if contrib.creator: # TODO: should we actually be pulling creator metadata? or just # using release-local raw metadata? + family = contrib.creator.surname + if not family: + if not contrib.raw_name: + raise ValueError("CSL requires some surname (family name)") + family = contrib.raw_name.split()[-1] c = dict( family=contrib.creator.surname, given=contrib.creator.given_name, @@ -42,6 +54,8 @@ def release_to_csl(entity): role=contrib.role, ) else: + if not contrib.raw_name: + raise ValueError("CSL requires some surname (family name)") c = dict( # XXX: possible inclusion of full name metadata in release_contrib family=contrib.raw_name.split()[-1], @@ -168,3 +182,41 @@ def refs_to_csl(entity): ret.append(csl) return ret +def citeproc_csl(csl_json, style, html=False): + """ + Renders a release entity to a styled citation. + + Notable styles include: + - 'csl-json': special case to JSON encode the structured CSL object (via + release_to_csl()) + - bibtext: multi-line bibtext format (used with LaTeX) + + Returns a string; if the html flag is set, and the style isn't 'csl-json' + or 'bibtex', it will be HTML. Otherwise plain text. + """ + if not csl_json.get('id'): + csl_json['id'] = "unknown" + if style == "csl-json": + return json.dumps(csl_json) + bib_src = CiteProcJSON([csl_json]) + form = formatter.plain + if html: + form = formatter.html + style_path = get_style_filepath(style) + bib_style = CitationStylesStyle(style_path, validate=False) + bib = CitationStylesBibliography(bib_style, bib_src, form) + bib.register(Citation([CitationItem(csl_json['id'])])) + lines = bib.bibliography()[0] + if style == "bibtex": + out = "\n" + for l in lines: + if l.startswith(" @"): + out += "@" + elif l.startswith(" "): + out += "\n " + l + else: + out += l + return ''.join(out) + else: + return ''.join(lines) + -- cgit v1.2.3