aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-03-25 12:56:36 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-03-25 13:19:44 -0700
commitec15f162706da58c464b5c2b7b623920fcb96d7f (patch)
tree01981fe18cf79dcc2d38f3a2c94d469b48eb0249 /python
parent40fd4863c3c68261119aaa5b8f53d6b9dfc7a4cd (diff)
downloadfatcat-ec15f162706da58c464b5c2b7b623920fcb96d7f.tar.gz
fatcat-ec15f162706da58c464b5c2b7b623920fcb96d7f.zip
improve citeproc/CSL web interface
This tries to show the citeproc (bibtext, MLA, CSL-JSON) options for more releases, and not show the links when they would break. The primary motivation here is to work around two exceptions being thrown in prod every day (according to sentry): KeyError: 'role' ValueError: CLS requries some surname (family name) I'm guessing these are mostly coming from crawlers following the citeproc links on release landing pages.
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/transforms/csl.py18
-rw-r--r--python/fatcat_web/entity_helpers.py7
-rw-r--r--python/fatcat_web/templates/release_view.html3
-rw-r--r--python/tests/transform_csl.py20
-rw-r--r--python/tests/web_citation_csl.py46
5 files changed, 72 insertions, 22 deletions
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py
index 7ab94cac..832ad6aa 100644
--- a/python/fatcat_tools/transforms/csl.py
+++ b/python/fatcat_tools/transforms/csl.py
@@ -37,8 +37,9 @@ def release_to_csl(entity):
# Default to "local" (publication-specific) metadata; fall back to
# creator-level
family = contrib.surname or contrib.creator.surname or (contrib.raw_name and contrib.raw_name.split()[-1])
- if not contrib.raw_name:
- raise ValueError("CSL requires some surname (family name)")
+ if not family:
+ # CSL requires some surname (family name)
+ continue
c = dict(
family=family,
given=contrib.given_name or contrib.creator.given_name,
@@ -49,22 +50,27 @@ def release_to_csl(entity):
#static-ordering
literal=contrib.raw_name or contrib.creator.display_name,
#parse-names,
- role=contrib.role,
+ # role must be defined; default to author
+ role=contrib.role or 'author',
)
else:
family = contrib.surname or (contrib.raw_name and contrib.raw_name.split()[-1])
- if not contrib.raw_name:
- raise ValueError("CSL requires some surname (family name)")
+ if not family:
+ # CSL requires some surname (family name)
+ continue
c = dict(
family=family,
given=contrib.given_name,
literal=contrib.raw_name,
- role=contrib.role,
+ # role must be defined; default to author
+ role=contrib.role or 'author',
)
for k in list(c.keys()):
if not c[k]:
c.pop(k)
contribs.append(c)
+ if not contribs:
+ raise ValueError("citeproc requires at least one author with a surname")
abstract = None
if entity.abstracts:
abstract = entity.abstracts[0].content
diff --git a/python/fatcat_web/entity_helpers.py b/python/fatcat_web/entity_helpers.py
index 591dda80..009f6160 100644
--- a/python/fatcat_web/entity_helpers.py
+++ b/python/fatcat_web/entity_helpers.py
@@ -74,8 +74,13 @@ def enrich_release_entity(entity):
ref.extra['unstructured'] = strip_extlink_xml(ref.extra['unstructured'])
# author list to display; ensure it's sorted by index (any othors with
# index=None go to end of list)
- authors = [c for c in entity.contribs if c.role in ('author', None)]
+ authors = [c for c in entity.contribs if
+ c.role in ('author', None) and
+ (c.surname or c.raw_name or (c.creator and c.creator.surname))
+ ]
entity._authors = sorted(authors, key=lambda c: (c.index == None and 99999999) or c.index)
+ # need authors, title for citeproc to work
+ entity._can_citeproc = bool(entity._authors) and bool(entity.title)
if entity.abstracts:
# hack to show plain text instead of latex abstracts
if 'latex' in entity.abstracts[0].mimetype:
diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html
index 961b4759..d7c4e76e 100644
--- a/python/fatcat_web/templates/release_view.html
+++ b/python/fatcat_web/templates/release_view.html
@@ -388,8 +388,7 @@ accessible version.
<br>grouping other versions (eg, pre-print) and variants of this release
</div>
-{# this restriction, for CSL-JSON generation, rules out almost everything #}
-{% if release.contribs and release.contribs[0].creator_id %}
+{% if release._can_citeproc %}
<div class="ui segment attached accordion">
<div class="title" style="padding: 0px;">
<i class="dropdown icon"></i><b>Cite This Release</b>
diff --git a/python/tests/transform_csl.py b/python/tests/transform_csl.py
index 6f29cba7..15c64ce5 100644
--- a/python/tests/transform_csl.py
+++ b/python/tests/transform_csl.py
@@ -12,22 +12,22 @@ def test_csl_crossref(crossref_importer):
# not a single line
raw = json.loads(f.read())
r = crossref_importer.parse_record(raw)
- # this work has some null contrib names; these should cause errors
- with pytest.raises(ValueError):
- release_to_csl(r)
- with pytest.raises(ValueError):
- csl = release_to_csl(r)
- citeproc_csl(csl, 'csl-json')
- # set with dummy so we can run other tests
- for c in r.contribs:
- if not c.raw_name:
- c.raw_name = "dummy"
csl = release_to_csl(r)
citeproc_csl(csl, 'csl-json')
citeproc_csl(csl, 'bibtex')
citeproc_csl(csl, 'harvard1')
citeproc_csl(csl, 'harvard1', html=True)
+ # check that with no author surnames, can't run
+ for c in r.contribs:
+ c.raw_name = None
+ c.surname = None
+ with pytest.raises(ValueError):
+ release_to_csl(r)
+ with pytest.raises(ValueError):
+ csl = release_to_csl(r)
+ citeproc_csl(csl, 'csl-json')
+
def test_csl_pubmed(crossref_importer):
with open('tests/files/example_releases_pubmed19n0972.json', 'r') as f:
# multiple single lines
diff --git a/python/tests/web_citation_csl.py b/python/tests/web_citation_csl.py
index 3279ebea..e016b2d9 100644
--- a/python/tests/web_citation_csl.py
+++ b/python/tests/web_citation_csl.py
@@ -6,7 +6,7 @@ from fatcat_openapi_client.rest import ApiException
from fixtures import *
-def test_release_bibtex(app):
+def test_release_bibtex(app, api):
# "realistic" demo entity
rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam')
@@ -17,6 +17,8 @@ def test_release_bibtex(app):
assert b'@article{' in rv.data
rv = app.get('/release/ccccccccccccccccccccccccca.bib')
assert rv.status_code == 404
+ rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=bibtex')
+ assert rv.status_code == 200
rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=csl-json')
assert rv.status_code == 200
# could also rv.get_json() here
@@ -25,10 +27,48 @@ def test_release_bibtex(app):
assert rv.status_code == 200
assert rv.data.decode('utf-8').startswith('Ioannidis, John. “Why Most Published Research Findings Are False”. 2.8 (2005)')
- # "dummy" demo entity
+ # "dummy" demo entity; very minimal metadata
rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai')
assert rv.status_code == 200
+ assert b'BibTeX' in rv.data
+ rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai.bib')
+ assert rv.status_code == 200
+ rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=modern-language-association')
+ assert rv.status_code == 200
+ rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=csl-json')
+ assert rv.status_code == 200
+
+ # create release which can not have citeproc run on it (no authors)
+ eg = quick_eg(api)
+ r1 = ReleaseEntity(
+ title="some title",
+ ext_ids=ReleaseExtIds(),
+ )
+ r1edit = api.create_release(eg.editgroup_id, r1)
+ api.accept_editgroup(eg.editgroup_id)
+
+ rv = app.get('/release/{}'.format(r1edit.ident))
+ assert rv.status_code == 200
assert not b'BibTeX' in rv.data
with pytest.raises(ValueError):
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai.bib')
+ rv = app.get('/release/{}.bib'.format(r1edit.ident))
+
+ # create release can have citeproc run on it (no authors)
+ eg = quick_eg(api)
+ r2 = ReleaseEntity(
+ title="some title again",
+ contribs=[
+ ReleaseContrib(
+ given_name="Paul",
+ surname="Otlet"),
+ ],
+ ext_ids=ReleaseExtIds(),
+ )
+ r2edit = api.create_release(eg.editgroup_id, r2)
+ api.accept_editgroup(eg.editgroup_id)
+ rv = app.get('/release/{}'.format(r2edit.ident))
+ assert rv.status_code == 200
+ assert b'BibTeX' in rv.data
+ rv = app.get('/release/{}.bib'.format(r2edit.ident))
+ assert rv.status_code == 200