aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-03-18 14:48:06 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-03-18 14:48:06 -0700
commit704ea367439f6faf88343b5ee50a438900c96aca (patch)
tree40463ae5d540d768eec77c4522589d6b95edf3f0
parentf05b1c823be23b0bc3885199aaca51137e6a22d3 (diff)
downloadfatcat-704ea367439f6faf88343b5ee50a438900c96aca.tar.gz
fatcat-704ea367439f6faf88343b5ee50a438900c96aca.zip
refactor and test citeproc code
-rw-r--r--python/fatcat_tools/transforms/__init__.py2
-rw-r--r--python/fatcat_tools/transforms/csl.py56
-rwxr-xr-xpython/fatcat_transform.py32
-rw-r--r--python/tests/transform_csl.py62
4 files changed, 120 insertions, 32 deletions
diff --git a/python/fatcat_tools/transforms/__init__.py b/python/fatcat_tools/transforms/__init__.py
index 4950433b..863472c0 100644
--- a/python/fatcat_tools/transforms/__init__.py
+++ b/python/fatcat_tools/transforms/__init__.py
@@ -1,4 +1,4 @@
from .entities import entity_to_dict, entity_from_json
from .elasticsearch import release_to_elasticsearch, container_to_elasticsearch, changelog_to_elasticsearch
-from .csl import release_to_csl
+from .csl import release_to_csl, citeproc_csl
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py
index c2e90539..3509f350 100644
--- a/python/fatcat_tools/transforms/csl.py
+++ b/python/fatcat_tools/transforms/csl.py
@@ -1,6 +1,13 @@
-
+import json
import collections
+
+from citeproc import CitationStylesStyle, CitationStylesBibliography
+from citeproc import Citation, CitationItem
+from citeproc import formatter
+from citeproc.source.json import CiteProcJSON
+from citeproc_styles import get_style_filepath
+
from fatcat_client import ApiClient
@@ -8,7 +15,7 @@ def contribs_by_role(contribs, role):
ret = [c.copy() for c in contribs if c['role'] == role]
[c.pop('role') for c in ret]
# XXX:
- [c.pop('literal') for c in ret]
+ [c.pop('literal') for c in ret if 'literal' in c]
if not ret:
return None
else:
@@ -29,6 +36,11 @@ def release_to_csl(entity):
if contrib.creator:
# TODO: should we actually be pulling creator metadata? or just
# using release-local raw metadata?
+ family = contrib.creator.surname
+ if not family:
+ if not contrib.raw_name:
+ raise ValueError("CSL requires some surname (family name)")
+ family = contrib.raw_name.split()[-1]
c = dict(
family=contrib.creator.surname,
given=contrib.creator.given_name,
@@ -42,6 +54,8 @@ def release_to_csl(entity):
role=contrib.role,
)
else:
+ if not contrib.raw_name:
+ raise ValueError("CSL requires some surname (family name)")
c = dict(
# XXX: possible inclusion of full name metadata in release_contrib
family=contrib.raw_name.split()[-1],
@@ -168,3 +182,41 @@ def refs_to_csl(entity):
ret.append(csl)
return ret
+def citeproc_csl(csl_json, style, html=False):
+ """
+ Renders a release entity to a styled citation.
+
+ Notable styles include:
+ - 'csl-json': special case to JSON encode the structured CSL object (via
+ release_to_csl())
+ - bibtext: multi-line bibtext format (used with LaTeX)
+
+ Returns a string; if the html flag is set, and the style isn't 'csl-json'
+ or 'bibtex', it will be HTML. Otherwise plain text.
+ """
+ if not csl_json.get('id'):
+ csl_json['id'] = "unknown"
+ if style == "csl-json":
+ return json.dumps(csl_json)
+ bib_src = CiteProcJSON([csl_json])
+ form = formatter.plain
+ if html:
+ form = formatter.html
+ style_path = get_style_filepath(style)
+ bib_style = CitationStylesStyle(style_path, validate=False)
+ bib = CitationStylesBibliography(bib_style, bib_src, form)
+ bib.register(Citation([CitationItem(csl_json['id'])]))
+ lines = bib.bibliography()[0]
+ if style == "bibtex":
+ out = "\n"
+ for l in lines:
+ if l.startswith(" @"):
+ out += "@"
+ elif l.startswith(" "):
+ out += "\n " + l
+ else:
+ out += l
+ return ''.join(out)
+ else:
+ return ''.join(lines)
+
diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py
index b11ac461..209edfd7 100755
--- a/python/fatcat_transform.py
+++ b/python/fatcat_transform.py
@@ -18,7 +18,7 @@ from fatcat_client.rest import ApiException
from fatcat_client import ReleaseEntity, ContainerEntity, ChangelogEntry
from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \
release_to_elasticsearch, container_to_elasticsearch, \
- changelog_to_elasticsearch, public_api, release_to_csl
+ changelog_to_elasticsearch, public_api, release_to_csl, citeproc_csl
def run_elasticsearch_releases(args):
@@ -55,35 +55,9 @@ def run_citeproc_releases(args):
continue
entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client)
csl_json = release_to_csl(entity)
- # XXX:
csl_json['id'] = "release:" + (entity.ident or "unknown")
- if args.style == "csl-json":
- args.json_output.write(json.dumps(csl_json) + "\n")
- continue
- bib_src = CiteProcJSON([csl_json])
- form = formatter.plain
- if args.html:
- form = formatter.html
- style_path = get_style_filepath(args.style)
- bib_style = CitationStylesStyle(style_path, validate=False)
- bib = CitationStylesBibliography(bib_style, bib_src, form)
- bib.register(Citation([CitationItem(csl_json['id'])]))
- # XXX:
- #args.json_output.write(
- # json.dumps(release_to_csl(entity)) + '\n')
- lines = bib.bibliography()[0]
- if args.style == "bibtex":
- for l in lines:
- if l.startswith(" @"):
- args.json_output.write("\n@")
- elif l.startswith(" "):
- #print("line: START|{}|END".format(l))
- args.json_output.write("\n " + l)
- else:
- args.json_output.write(l)
- else:
- args.json_output.write(''.join(lines) + "\n")
- print()
+ out = citeproc_csl(csl_json, args.style, args.html)
+ args.json_output.write(out + "\n")
def main():
parser = argparse.ArgumentParser()
diff --git a/python/tests/transform_csl.py b/python/tests/transform_csl.py
new file mode 100644
index 00000000..43f7a99d
--- /dev/null
+++ b/python/tests/transform_csl.py
@@ -0,0 +1,62 @@
+
+import json
+import pytest
+from fatcat_tools import *
+from fatcat_client import *
+
+from fixtures import api
+from import_crossref import crossref_importer
+
+def test_csl_crossref(crossref_importer):
+ with open('tests/files/crossref-works.single.json', 'r') as f:
+ # not a single line
+ raw = json.loads(f.read())
+ r = crossref_importer.parse_record(raw)
+ # this work has some null contrib names; these should cause errors
+ with pytest.raises(ValueError):
+ release_to_csl(r)
+ with pytest.raises(ValueError):
+ csl = release_to_csl(r)
+ citeproc_csl(csl, 'csl-json')
+ # set with dummy so we can run other tests
+ for c in r.contribs:
+ if not c.raw_name:
+ c.raw_name = "dummy"
+ csl = release_to_csl(r)
+ citeproc_csl(csl, 'csl-json')
+ citeproc_csl(csl, 'bibtex')
+ citeproc_csl(csl, 'harvard1')
+ citeproc_csl(csl, 'harvard1', html=True)
+
+def test_csl_pubmed(crossref_importer):
+ with open('tests/files/example_releases_pubmed19n0972.json', 'r') as f:
+ # multiple single lines
+ for line in f:
+ r = entity_from_json(line, ReleaseEntity)
+ csl = release_to_csl(r)
+ citeproc_csl(csl, 'csl-json')
+ citeproc_csl(csl, 'bibtex')
+ citeproc_csl(csl, 'harvard1')
+ citeproc_csl(csl, 'harvard1', html=True)
+
+def test_csl_pubmed_bibtex(crossref_importer):
+ with open('tests/files/example_releases_pubmed19n0972.json', 'r') as f:
+ r = entity_from_json(f.readline(), ReleaseEntity)
+ csl = release_to_csl(r)
+ print(citeproc_csl(csl, 'bibtex'))
+ # XXX: what's with the '`' in volume?
+ assert citeproc_csl(csl, 'bibtex').strip() == """
+@article{mędrela-kuder_szymura_2018,
+ title={Selected anti-health behaviours among women with osteoporosis},
+ volume={69`},
+ ISSN={0035-7715},
+ DOI={10.32394/rpzh.2018.0046},
+ abstractNote={In the prevention of osteoporosis and its treatment, it is important to prevent bone loss by reducing the occurrence of factors determining human health, which reduce the risk of osteoporosis, such as health behaviors.},
+ number={4},
+ journal={Roczniki Panstwowego Zakladu Higieny},
+ author={Mędrela-Kuder and Szymura},
+ year={2018}}
+ """.strip()
+ assert citeproc_csl(csl, 'harvard1', html=True).strip() == """
+ Mędrela-Kuder &amp; Szymura, 2018. Selected anti-health behaviours among women with osteoporosis. <i>Roczniki Panstwowego Zakladu Higieny</i>, 69`(4).
+ """.strip()