diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-03-25 12:56:36 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-03-25 13:19:44 -0700 |
commit | ec15f162706da58c464b5c2b7b623920fcb96d7f (patch) | |
tree | 01981fe18cf79dcc2d38f3a2c94d469b48eb0249 | |
parent | 40fd4863c3c68261119aaa5b8f53d6b9dfc7a4cd (diff) | |
download | fatcat-ec15f162706da58c464b5c2b7b623920fcb96d7f.tar.gz fatcat-ec15f162706da58c464b5c2b7b623920fcb96d7f.zip |
improve citeproc/CSL web interface
This tries to show the citeproc (bibtext, MLA, CSL-JSON) options for
more releases, and not show the links when they would break.
The primary motivation here is to work around two exceptions being
thrown in prod every day (according to sentry):
KeyError: 'role'
ValueError: CLS requries some surname (family name)
I'm guessing these are mostly coming from crawlers following the
citeproc links on release landing pages.
-rw-r--r-- | python/fatcat_tools/transforms/csl.py | 18 | ||||
-rw-r--r-- | python/fatcat_web/entity_helpers.py | 7 | ||||
-rw-r--r-- | python/fatcat_web/templates/release_view.html | 3 | ||||
-rw-r--r-- | python/tests/transform_csl.py | 20 | ||||
-rw-r--r-- | python/tests/web_citation_csl.py | 46 |
5 files changed, 72 insertions, 22 deletions
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py index 7ab94cac..832ad6aa 100644 --- a/python/fatcat_tools/transforms/csl.py +++ b/python/fatcat_tools/transforms/csl.py @@ -37,8 +37,9 @@ def release_to_csl(entity): # Default to "local" (publication-specific) metadata; fall back to # creator-level family = contrib.surname or contrib.creator.surname or (contrib.raw_name and contrib.raw_name.split()[-1]) - if not contrib.raw_name: - raise ValueError("CSL requires some surname (family name)") + if not family: + # CSL requires some surname (family name) + continue c = dict( family=family, given=contrib.given_name or contrib.creator.given_name, @@ -49,22 +50,27 @@ def release_to_csl(entity): #static-ordering literal=contrib.raw_name or contrib.creator.display_name, #parse-names, - role=contrib.role, + # role must be defined; default to author + role=contrib.role or 'author', ) else: family = contrib.surname or (contrib.raw_name and contrib.raw_name.split()[-1]) - if not contrib.raw_name: - raise ValueError("CSL requires some surname (family name)") + if not family: + # CSL requires some surname (family name) + continue c = dict( family=family, given=contrib.given_name, literal=contrib.raw_name, - role=contrib.role, + # role must be defined; default to author + role=contrib.role or 'author', ) for k in list(c.keys()): if not c[k]: c.pop(k) contribs.append(c) + if not contribs: + raise ValueError("citeproc requires at least one author with a surname") abstract = None if entity.abstracts: abstract = entity.abstracts[0].content diff --git a/python/fatcat_web/entity_helpers.py b/python/fatcat_web/entity_helpers.py index 591dda80..009f6160 100644 --- a/python/fatcat_web/entity_helpers.py +++ b/python/fatcat_web/entity_helpers.py @@ -74,8 +74,13 @@ def enrich_release_entity(entity): ref.extra['unstructured'] = strip_extlink_xml(ref.extra['unstructured']) # author list to display; ensure it's sorted by index (any othors with # index=None go to end of list) - authors = [c for c in entity.contribs if c.role in ('author', None)] + authors = [c for c in entity.contribs if + c.role in ('author', None) and + (c.surname or c.raw_name or (c.creator and c.creator.surname)) + ] entity._authors = sorted(authors, key=lambda c: (c.index == None and 99999999) or c.index) + # need authors, title for citeproc to work + entity._can_citeproc = bool(entity._authors) and bool(entity.title) if entity.abstracts: # hack to show plain text instead of latex abstracts if 'latex' in entity.abstracts[0].mimetype: diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html index 961b4759..d7c4e76e 100644 --- a/python/fatcat_web/templates/release_view.html +++ b/python/fatcat_web/templates/release_view.html @@ -388,8 +388,7 @@ accessible version. <br>grouping other versions (eg, pre-print) and variants of this release </div> -{# this restriction, for CSL-JSON generation, rules out almost everything #} -{% if release.contribs and release.contribs[0].creator_id %} +{% if release._can_citeproc %} <div class="ui segment attached accordion"> <div class="title" style="padding: 0px;"> <i class="dropdown icon"></i><b>Cite This Release</b> diff --git a/python/tests/transform_csl.py b/python/tests/transform_csl.py index 6f29cba7..15c64ce5 100644 --- a/python/tests/transform_csl.py +++ b/python/tests/transform_csl.py @@ -12,22 +12,22 @@ def test_csl_crossref(crossref_importer): # not a single line raw = json.loads(f.read()) r = crossref_importer.parse_record(raw) - # this work has some null contrib names; these should cause errors - with pytest.raises(ValueError): - release_to_csl(r) - with pytest.raises(ValueError): - csl = release_to_csl(r) - citeproc_csl(csl, 'csl-json') - # set with dummy so we can run other tests - for c in r.contribs: - if not c.raw_name: - c.raw_name = "dummy" csl = release_to_csl(r) citeproc_csl(csl, 'csl-json') citeproc_csl(csl, 'bibtex') citeproc_csl(csl, 'harvard1') citeproc_csl(csl, 'harvard1', html=True) + # check that with no author surnames, can't run + for c in r.contribs: + c.raw_name = None + c.surname = None + with pytest.raises(ValueError): + release_to_csl(r) + with pytest.raises(ValueError): + csl = release_to_csl(r) + citeproc_csl(csl, 'csl-json') + def test_csl_pubmed(crossref_importer): with open('tests/files/example_releases_pubmed19n0972.json', 'r') as f: # multiple single lines diff --git a/python/tests/web_citation_csl.py b/python/tests/web_citation_csl.py index 3279ebea..e016b2d9 100644 --- a/python/tests/web_citation_csl.py +++ b/python/tests/web_citation_csl.py @@ -6,7 +6,7 @@ from fatcat_openapi_client.rest import ApiException from fixtures import * -def test_release_bibtex(app): +def test_release_bibtex(app, api): # "realistic" demo entity rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam') @@ -17,6 +17,8 @@ def test_release_bibtex(app): assert b'@article{' in rv.data rv = app.get('/release/ccccccccccccccccccccccccca.bib') assert rv.status_code == 404 + rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=bibtex') + assert rv.status_code == 200 rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=csl-json') assert rv.status_code == 200 # could also rv.get_json() here @@ -25,10 +27,48 @@ def test_release_bibtex(app): assert rv.status_code == 200 assert rv.data.decode('utf-8').startswith('Ioannidis, John. “Why Most Published Research Findings Are False”. 2.8 (2005)') - # "dummy" demo entity + # "dummy" demo entity; very minimal metadata rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai') assert rv.status_code == 200 + assert b'BibTeX' in rv.data + rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai.bib') + assert rv.status_code == 200 + rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=modern-language-association') + assert rv.status_code == 200 + rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=csl-json') + assert rv.status_code == 200 + + # create release which can not have citeproc run on it (no authors) + eg = quick_eg(api) + r1 = ReleaseEntity( + title="some title", + ext_ids=ReleaseExtIds(), + ) + r1edit = api.create_release(eg.editgroup_id, r1) + api.accept_editgroup(eg.editgroup_id) + + rv = app.get('/release/{}'.format(r1edit.ident)) + assert rv.status_code == 200 assert not b'BibTeX' in rv.data with pytest.raises(ValueError): - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai.bib') + rv = app.get('/release/{}.bib'.format(r1edit.ident)) + + # create release can have citeproc run on it (no authors) + eg = quick_eg(api) + r2 = ReleaseEntity( + title="some title again", + contribs=[ + ReleaseContrib( + given_name="Paul", + surname="Otlet"), + ], + ext_ids=ReleaseExtIds(), + ) + r2edit = api.create_release(eg.editgroup_id, r2) + api.accept_editgroup(eg.editgroup_id) + rv = app.get('/release/{}'.format(r2edit.ident)) + assert rv.status_code == 200 + assert b'BibTeX' in rv.data + rv = app.get('/release/{}.bib'.format(r2edit.ident)) + assert rv.status_code == 200 |