From 955d4942d5111cbbd8f4b99d4e3c6d79262dab6e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 13 May 2019 14:23:02 -0700 Subject: partial python impl of ext_id and release_stage refactors --- python/fatcat_tools/importers/cdl_dash_dat.py | 8 ++-- python/fatcat_tools/importers/crossref.py | 26 ++++++----- python/fatcat_tools/importers/grobid_metadata.py | 1 + python/fatcat_tools/transforms/csl.py | 8 ++-- python/fatcat_tools/transforms/elasticsearch.py | 21 ++++----- python/fatcat_web/forms.py | 24 +++++++--- python/fatcat_web/routes.py | 2 +- python/fatcat_web/templates/entity_macros.html | 6 +-- python/fatcat_web/templates/release_edit.html | 4 +- python/fatcat_web/templates/release_view.html | 56 ++++++++++++------------ 10 files changed, 86 insertions(+), 70 deletions(-) (limited to 'python') diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index 1a21194d..e1252b6d 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -96,7 +96,9 @@ def cdl_dash_release(meta, extra=None): )) r = ReleaseEntity( - doi=doi, + ext_ids=ReleaseEntityExtIds( + doi=doi, + ), title=clean(meta['title'], force_xml=True), publisher=clean(meta['publisher']), release_year=int(meta['publicationYear']), @@ -170,9 +172,9 @@ def auto_cdl_dash_dat(api, dat_path, release_id=None, editgroup_id=None): agent="fatcat_tools.auto_cdl_dash_dat"))) editgroup_id = eg.editgroup_id - if not release_id and release.doi: + if not release_id and release.ext_ids.doi: try: - r = api.lookup_release(doi=release.doi) + r = api.lookup_release(doi=release.ext_ids.doi) release_id = r.ident except fatcat_client.rest.ApiException: pass diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 722f93aa..999ce13f 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -355,10 +355,10 @@ class CrossrefImporter(EntityImporter): # release status if obj['type'] in ('journal-article', 'conference-proceeding', 'book', 'dissertation', 'book-chapter'): - release_status = "published" + release_stage = "published" else: # unknown - release_status = None + release_stage = None # external identifiers extids = self.lookup_ext_ids(doi=obj['DOI'].lower()) @@ -408,18 +408,20 @@ class CrossrefImporter(EntityImporter): title=title, original_title=original_title, release_type=release_type, - release_status=release_status, + release_stage=release_stage, release_date=release_date, release_year=release_year, publisher=publisher, - doi=obj['DOI'].lower(), - pmid=extids['pmid'], - pmcid=extids['pmcid'], - wikidata_qid=extids['wikidata_qid'], - isbn13=isbn13, - core_id=extids['core_id'], - arxiv_id=extids['arxiv_id'], - jstor_id=extids['jstor_id'], + ext_ids=fatcat_client.ReleaseEntityExtIds( + doi=obj['DOI'].lower(), + pmid=extids['pmid'], + pmcid=extids['pmcid'], + wikidata_qid=extids['wikidata_qid'], + isbn13=isbn13, + core=extids['core_id'], + arxiv=extids['arxiv_id'], + jstor=extids['jstor_id'], + ), volume=clean(obj.get('volume')), issue=clean(obj.get('issue')), pages=clean(obj.get('page')), @@ -437,7 +439,7 @@ class CrossrefImporter(EntityImporter): # lookup existing DOI (don't need to try other ext idents for crossref) existing = None try: - existing = self.api.lookup_release(doi=re.doi) + existing = self.api.lookup_release(doi=re.ext_ids.doi) except fatcat_client.rest.ApiException as err: if err.status != 404: raise err diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 7281a7a1..ba91d183 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -156,6 +156,7 @@ class GrobidMetadataImporter(EntityImporter): volume=clean(obj['journal'].get('volume')), issue=clean(obj['journal'].get('issue')), abstracts=abstracts, + ext_ids=fatcat_client.ReleaseEntityExtIds(), extra=extra) return re diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py index 7bc026ed..079e0501 100644 --- a/python/fatcat_tools/transforms/csl.py +++ b/python/fatcat_tools/transforms/csl.py @@ -109,13 +109,13 @@ def release_to_csl(entity): container_title=entity.container and entity.container.name, #container-title-short #dimensions - DOI=entity.doi, + DOI=entity.ext_ids.doi, #edition #event #event-place #first-reference-note-number #genre - ISBN=entity.isbn13, + ISBN=entity.ext_ids.isbn13, ISSN=entity.container and entity.container.issnl, issue=entity.issue, #jurisdiction @@ -131,8 +131,8 @@ def release_to_csl(entity): #original-title # TODO: page=entity.pages, page_first=entity.pages and entity.pages.split('-')[0], - PMCID=entity.pmcid, - PMID=entity.pmid, + PMCID=entity.ext_ids.pmcid, + PMID=entity.ext_ids.pmid, publisher=(entity.container and entity.container.publisher) or entity.publisher, #publisher-place #references diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 73ac046e..971d1c11 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -45,17 +45,18 @@ def release_to_elasticsearch(entity, force_bool=True): title = release.title, original_title = release.original_title, release_type = release.release_type, - release_status = release.release_status, + release_status = release.release_stage, language = release.language, license = release.license_slug, - doi = release.doi, - pmid = release.pmid, - pmcid = release.pmcid, - isbn13 = release.isbn13, - wikidata_qid = release.wikidata_qid, - core_id = release.core_id, - arxiv_id = release.core_id, - jstor_id = release.jstor_id, + doi = release.ext_ids.doi, + pmid = release.ext_ids.pmid, + pmcid = release.ext_ids.pmcid, + isbn13 = release.ext_ids.isbn13, + wikidata_qid = release.ext_ids.wikidata_qid, + core_id = release.ext_ids.core, + arxiv_id = release.ext_ids.arxiv, + jstor_id = release.ext_ids.jstor, + # TODO: mag, ark ) is_oa = None @@ -121,7 +122,7 @@ def release_to_elasticsearch(entity, force_bool=True): else: t['publisher'] = release.publisher - if release.jstor_id or (release.doi and release.doi.startswith('10.2307/')): + if release.ext_ids.jstor or (release.ext_ids.doi and release.ext_ids.doi.startswith('10.2307/')): in_jstor = True files = release.files or [] diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index 79365687..e22476ce 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -9,7 +9,7 @@ from wtforms import SelectField, DateField, StringField, IntegerField, \ HiddenField, FormField, FieldList, validators from fatcat_client import ContainerEntity, CreatorEntity, FileEntity, \ - ReleaseEntity, ReleaseContrib, FileEntityUrls + ReleaseEntity, ReleaseContrib, FileEntityUrls, ReleaseEntityExtIds release_type_options = [ ('', 'Unknown'), @@ -21,7 +21,7 @@ release_type_options = [ ('dataset', 'Dataset'), ('stub', 'Invalid/Stub'), ] -release_status_options = [ +release_stage_options = [ ('', 'Unknown'), ('draft', 'Draft'), ('submitted', 'Submitted'), @@ -62,9 +62,10 @@ class ReleaseContribForm(FlaskForm): default='author') RELEASE_SIMPLE_ATTRS = ['title', 'original_title', 'work_id', 'container_id', - 'release_type', 'release_status', 'release_date', 'doi', 'wikidata_qid', - 'isbn13', 'pmid', 'pmcid', 'volume', 'issue', 'pages', 'publisher', - 'language', 'license_slug'] + 'release_type', 'release_stage', 'release_date', 'volume', 'issue', + 'pages', 'publisher', 'language', 'license_slug'] + +RELEASE_EXTID_ATTRS = ['doi', 'wikidata_qid', 'isbn13', 'pmid', 'pmcid'] class ReleaseEntityForm(EntityEditForm): """ @@ -85,7 +86,7 @@ class ReleaseEntityForm(EntityEditForm): [validators.DataRequired()], choices=release_type_options, default='') - release_status = SelectField(choices=release_status_options) + release_stage = SelectField(choices=release_stage_options) release_date = DateField('Release Date', [validators.Optional(True)]) #release_year @@ -118,6 +119,9 @@ class ReleaseEntityForm(EntityEditForm): for simple_attr in RELEASE_SIMPLE_ATTRS: a = getattr(ref, simple_attr) a.data = getattr(re, simple_attr) + for extid_attr in RELEASE_EXTID_ATTRS: + a = getattr(ref, extid_attr) + a.data = getattr(re.ext_ids, extid_attr) for i, c in enumerate(re.contribs): rcf = ReleaseContribForm() rcf.prev_index = i @@ -128,7 +132,7 @@ class ReleaseEntityForm(EntityEditForm): def to_entity(self): assert(self.title.data) - entity = ReleaseEntity(title=self.title.data) + entity = ReleaseEntity(title=self.title.data, ext_ids=ReleaseEntityExtIds()) self.update_entity(entity) return entity @@ -145,6 +149,12 @@ class ReleaseEntityForm(EntityEditForm): if a == '': a = None setattr(re, simple_attr, a) + for extid_attr in RELEASE_EXTID_ATTRS: + a = getattr(self, simple_attr).data + # special case blank strings + if a == '': + a = None + setattr(re.ext_ids, simple_attr, a) # bunch of complexity here to preserve old contrib metadata (eg, # affiliation and extra) not included in current forms # TODO: this may be broken; either way needs tests diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 633cbb22..a61c7c6d 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -239,7 +239,7 @@ def webcapture_view(ident): @app.route('/release/lookup', methods=['GET']) def release_lookup(): extid = None - for key in ('doi', 'wikidata_qid', 'pmid', 'pmcid', 'isbn13', 'core_id'): + for key in ('doi', 'wikidata_qid', 'pmid', 'pmcid', 'isbn13', 'core', 'ark', 'mag'): if request.args.get(key): extid = key break diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html index cefb0378..d1bf86c3 100644 --- a/python/fatcat_web/templates/entity_macros.html +++ b/python/fatcat_web/templates/entity_macros.html @@ -60,12 +60,12 @@ {% if release.release_date %}{{ release.release_date }}{% elif release.release_year %}{{ release.release_year }}{% else %}unknown{% endif %} {{ release.title }} -
{{ release.release_status or "unknown status" }} +
{{ release.release_stage or "unknown status" }} | {{ release.release_type or "unknown type" }} {% if release.license_slug %} | {{ release.license_slug }}{% endif %} - {% if release.doi %} -
doi:{{ release.doi }} + {% if release.ext_ids.doi %} +
doi:{{ release.ext_ids.doi }} {% endif %} {% endfor %} diff --git a/python/fatcat_web/templates/release_edit.html b/python/fatcat_web/templates/release_edit.html index 7bd4a00a..b3beec2b 100644 --- a/python/fatcat_web/templates/release_edit.html +++ b/python/fatcat_web/templates/release_edit.html @@ -20,7 +20,7 @@
{{ edit_macros.form_field_basic(form.release_type, "required") }} - {{ edit_macros.form_field_basic(form.release_status) }} + {{ edit_macros.form_field_basic(form.release_stage) }}
@@ -133,7 +133,7 @@ $(document).ready(function() { // these javascript dropdowns hide the original , which breaks browser // form focusing (eg, for required fields) :( //$('#release_type').dropdown(); - //$('#release_status').dropdown(); + //$('#release_stage').dropdown(); $('.ui.accordion').accordion(); var fixup_contrib_numbering = function(group_item) { diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html index 403af653..820c2797 100644 --- a/python/fatcat_web/templates/release_view.html +++ b/python/fatcat_web/templates/release_view.html @@ -55,13 +55,13 @@ {% endif %} - {% if release.doi %} - - + {% if release.ext_ids.doi %} + + {% endif %} - {% if release.isbn13 %} - - + {% if release.ext_ids.isbn13 %} + + {% endif %} {% for file in (entity.files or [])[:10] %}{% for url in (file.urls or [])[:10] %} {% if '//web.archive.org/web/' in url.url %} @@ -101,7 +101,7 @@
- {% if release.release_status == 'published' %} + {% if release.release_stage == 'published' %} Published in {{ container.name }} {% else %} Released as a {{ release.release_type }} @@ -323,13 +323,13 @@ No Full Text Available {% endif %} -{% if release.release_type or release.release_status or release.release_year %} +{% if release.release_type or release.release_stage or release.release_year %}
{% if release.release_type %} Type  {{ release.release_type }}
{% endif %} - {% if release.release_status %} - Status   {{ release.release_status or 'unknown' }}
+ {% if release.release_stage %} + Status   {{ release.release_stage or 'unknown' }}
{% endif %} {% if release.release_date %} @@ -341,22 +341,22 @@
{% endif %} -{% if release.doi or release.pmid or release.pmcid or release.wikidata_qid %} +{% if release.ext_ids.doi or release.ext_ids.pmid or release.ext_ids.pmcid or release.ext_ids.wikidata_qid %}
-{% if release.doi %} - DOI  {{ release.doi }}
+{% if release.ext_ids.doi %} + DOI  {{ release.ext_ids.doi }}
{% endif %} -{% if release.pmid != None %} - PubMed  {{ release.pmid }}
+{% if release.ext_ids.pmid != None %} + PubMed  {{ release.ext_ids.pmid }}
{% endif %} -{% if release.pmcid != None %} - PMC  {{ release.pmcid }}
+{% if release.ext_ids.pmcid != None %} + PMC  {{ release.ext_ids.pmcid }}
{% endif %} -{% if release.wikidata_qid != None %} - Wikidata  {{ release.wikidata_qid }}
+{% if release.ext_ids.wikidata_qid != None %} + Wikidata  {{ release.ext_ids.wikidata_qid }}
{% endif %} -{% if release.isbn13 != None %} - ISBN-13  {{ release.isbn13 }} +{% if release.ext_ids.isbn13 != None %} + ISBN-13  {{ release.ext_ids.isbn13 }} {% endif %}
{% endif %} @@ -429,17 +429,17 @@ {% if container != None and container.issnl != None %} SHERPA/RoMEO (journal policies)
{% endif %} - {% if container != None and container.doi != None %} - oaDOI/unpaywall
+ {% if release != None and release.ext_ids.doi != None %} + oaDOI/unpaywall
{% endif %} - {% if release.isbn13 != None %} - Open Library
- Worldcat
+ {% if release.ext_ids.isbn13 != None %} + Open Library
+ Worldcat
{% else %} Worldcat
{% endif %} - {% if release.doi %} - Crossref Metadata (via API)
+ {% if release.ext_ids.doi %} + Crossref Metadata (via API)
{% endif %} wikidata.org
CORE.ac.uk
-- cgit v1.2.3