aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-13 14:23:02 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-13 14:23:06 -0700
commit955d4942d5111cbbd8f4b99d4e3c6d79262dab6e (patch)
tree87757a7afdf00ab6de5ed7455267654bf49b5239 /python
parent287e892abb4adac8ea7f7211524a41c7cd06851c (diff)
downloadfatcat-955d4942d5111cbbd8f4b99d4e3c6d79262dab6e.tar.gz
fatcat-955d4942d5111cbbd8f4b99d4e3c6d79262dab6e.zip
partial python impl of ext_id and release_stage refactors
Diffstat (limited to 'python')
-rwxr-xr-xpython/fatcat_tools/importers/cdl_dash_dat.py8
-rw-r--r--python/fatcat_tools/importers/crossref.py26
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py1
-rw-r--r--python/fatcat_tools/transforms/csl.py8
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py21
-rw-r--r--python/fatcat_web/forms.py24
-rw-r--r--python/fatcat_web/routes.py2
-rw-r--r--python/fatcat_web/templates/entity_macros.html6
-rw-r--r--python/fatcat_web/templates/release_edit.html4
-rw-r--r--python/fatcat_web/templates/release_view.html56
10 files changed, 86 insertions, 70 deletions
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py
index 1a21194d..e1252b6d 100755
--- a/python/fatcat_tools/importers/cdl_dash_dat.py
+++ b/python/fatcat_tools/importers/cdl_dash_dat.py
@@ -96,7 +96,9 @@ def cdl_dash_release(meta, extra=None):
))
r = ReleaseEntity(
- doi=doi,
+ ext_ids=ReleaseEntityExtIds(
+ doi=doi,
+ ),
title=clean(meta['title'], force_xml=True),
publisher=clean(meta['publisher']),
release_year=int(meta['publicationYear']),
@@ -170,9 +172,9 @@ def auto_cdl_dash_dat(api, dat_path, release_id=None, editgroup_id=None):
agent="fatcat_tools.auto_cdl_dash_dat")))
editgroup_id = eg.editgroup_id
- if not release_id and release.doi:
+ if not release_id and release.ext_ids.doi:
try:
- r = api.lookup_release(doi=release.doi)
+ r = api.lookup_release(doi=release.ext_ids.doi)
release_id = r.ident
except fatcat_client.rest.ApiException:
pass
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 722f93aa..999ce13f 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -355,10 +355,10 @@ class CrossrefImporter(EntityImporter):
# release status
if obj['type'] in ('journal-article', 'conference-proceeding', 'book',
'dissertation', 'book-chapter'):
- release_status = "published"
+ release_stage = "published"
else:
# unknown
- release_status = None
+ release_stage = None
# external identifiers
extids = self.lookup_ext_ids(doi=obj['DOI'].lower())
@@ -408,18 +408,20 @@ class CrossrefImporter(EntityImporter):
title=title,
original_title=original_title,
release_type=release_type,
- release_status=release_status,
+ release_stage=release_stage,
release_date=release_date,
release_year=release_year,
publisher=publisher,
- doi=obj['DOI'].lower(),
- pmid=extids['pmid'],
- pmcid=extids['pmcid'],
- wikidata_qid=extids['wikidata_qid'],
- isbn13=isbn13,
- core_id=extids['core_id'],
- arxiv_id=extids['arxiv_id'],
- jstor_id=extids['jstor_id'],
+ ext_ids=fatcat_client.ReleaseEntityExtIds(
+ doi=obj['DOI'].lower(),
+ pmid=extids['pmid'],
+ pmcid=extids['pmcid'],
+ wikidata_qid=extids['wikidata_qid'],
+ isbn13=isbn13,
+ core=extids['core_id'],
+ arxiv=extids['arxiv_id'],
+ jstor=extids['jstor_id'],
+ ),
volume=clean(obj.get('volume')),
issue=clean(obj.get('issue')),
pages=clean(obj.get('page')),
@@ -437,7 +439,7 @@ class CrossrefImporter(EntityImporter):
# lookup existing DOI (don't need to try other ext idents for crossref)
existing = None
try:
- existing = self.api.lookup_release(doi=re.doi)
+ existing = self.api.lookup_release(doi=re.ext_ids.doi)
except fatcat_client.rest.ApiException as err:
if err.status != 404:
raise err
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 7281a7a1..ba91d183 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -156,6 +156,7 @@ class GrobidMetadataImporter(EntityImporter):
volume=clean(obj['journal'].get('volume')),
issue=clean(obj['journal'].get('issue')),
abstracts=abstracts,
+ ext_ids=fatcat_client.ReleaseEntityExtIds(),
extra=extra)
return re
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py
index 7bc026ed..079e0501 100644
--- a/python/fatcat_tools/transforms/csl.py
+++ b/python/fatcat_tools/transforms/csl.py
@@ -109,13 +109,13 @@ def release_to_csl(entity):
container_title=entity.container and entity.container.name,
#container-title-short
#dimensions
- DOI=entity.doi,
+ DOI=entity.ext_ids.doi,
#edition
#event
#event-place
#first-reference-note-number
#genre
- ISBN=entity.isbn13,
+ ISBN=entity.ext_ids.isbn13,
ISSN=entity.container and entity.container.issnl,
issue=entity.issue,
#jurisdiction
@@ -131,8 +131,8 @@ def release_to_csl(entity):
#original-title
# TODO: page=entity.pages,
page_first=entity.pages and entity.pages.split('-')[0],
- PMCID=entity.pmcid,
- PMID=entity.pmid,
+ PMCID=entity.ext_ids.pmcid,
+ PMID=entity.ext_ids.pmid,
publisher=(entity.container and entity.container.publisher) or entity.publisher,
#publisher-place
#references
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index 73ac046e..971d1c11 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -45,17 +45,18 @@ def release_to_elasticsearch(entity, force_bool=True):
title = release.title,
original_title = release.original_title,
release_type = release.release_type,
- release_status = release.release_status,
+ release_status = release.release_stage,
language = release.language,
license = release.license_slug,
- doi = release.doi,
- pmid = release.pmid,
- pmcid = release.pmcid,
- isbn13 = release.isbn13,
- wikidata_qid = release.wikidata_qid,
- core_id = release.core_id,
- arxiv_id = release.core_id,
- jstor_id = release.jstor_id,
+ doi = release.ext_ids.doi,
+ pmid = release.ext_ids.pmid,
+ pmcid = release.ext_ids.pmcid,
+ isbn13 = release.ext_ids.isbn13,
+ wikidata_qid = release.ext_ids.wikidata_qid,
+ core_id = release.ext_ids.core,
+ arxiv_id = release.ext_ids.arxiv,
+ jstor_id = release.ext_ids.jstor,
+ # TODO: mag, ark
)
is_oa = None
@@ -121,7 +122,7 @@ def release_to_elasticsearch(entity, force_bool=True):
else:
t['publisher'] = release.publisher
- if release.jstor_id or (release.doi and release.doi.startswith('10.2307/')):
+ if release.ext_ids.jstor or (release.ext_ids.doi and release.ext_ids.doi.startswith('10.2307/')):
in_jstor = True
files = release.files or []
diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py
index 79365687..e22476ce 100644
--- a/python/fatcat_web/forms.py
+++ b/python/fatcat_web/forms.py
@@ -9,7 +9,7 @@ from wtforms import SelectField, DateField, StringField, IntegerField, \
HiddenField, FormField, FieldList, validators
from fatcat_client import ContainerEntity, CreatorEntity, FileEntity, \
- ReleaseEntity, ReleaseContrib, FileEntityUrls
+ ReleaseEntity, ReleaseContrib, FileEntityUrls, ReleaseEntityExtIds
release_type_options = [
('', 'Unknown'),
@@ -21,7 +21,7 @@ release_type_options = [
('dataset', 'Dataset'),
('stub', 'Invalid/Stub'),
]
-release_status_options = [
+release_stage_options = [
('', 'Unknown'),
('draft', 'Draft'),
('submitted', 'Submitted'),
@@ -62,9 +62,10 @@ class ReleaseContribForm(FlaskForm):
default='author')
RELEASE_SIMPLE_ATTRS = ['title', 'original_title', 'work_id', 'container_id',
- 'release_type', 'release_status', 'release_date', 'doi', 'wikidata_qid',
- 'isbn13', 'pmid', 'pmcid', 'volume', 'issue', 'pages', 'publisher',
- 'language', 'license_slug']
+ 'release_type', 'release_stage', 'release_date', 'volume', 'issue',
+ 'pages', 'publisher', 'language', 'license_slug']
+
+RELEASE_EXTID_ATTRS = ['doi', 'wikidata_qid', 'isbn13', 'pmid', 'pmcid']
class ReleaseEntityForm(EntityEditForm):
"""
@@ -85,7 +86,7 @@ class ReleaseEntityForm(EntityEditForm):
[validators.DataRequired()],
choices=release_type_options,
default='')
- release_status = SelectField(choices=release_status_options)
+ release_stage = SelectField(choices=release_stage_options)
release_date = DateField('Release Date',
[validators.Optional(True)])
#release_year
@@ -118,6 +119,9 @@ class ReleaseEntityForm(EntityEditForm):
for simple_attr in RELEASE_SIMPLE_ATTRS:
a = getattr(ref, simple_attr)
a.data = getattr(re, simple_attr)
+ for extid_attr in RELEASE_EXTID_ATTRS:
+ a = getattr(ref, extid_attr)
+ a.data = getattr(re.ext_ids, extid_attr)
for i, c in enumerate(re.contribs):
rcf = ReleaseContribForm()
rcf.prev_index = i
@@ -128,7 +132,7 @@ class ReleaseEntityForm(EntityEditForm):
def to_entity(self):
assert(self.title.data)
- entity = ReleaseEntity(title=self.title.data)
+ entity = ReleaseEntity(title=self.title.data, ext_ids=ReleaseEntityExtIds())
self.update_entity(entity)
return entity
@@ -145,6 +149,12 @@ class ReleaseEntityForm(EntityEditForm):
if a == '':
a = None
setattr(re, simple_attr, a)
+ for extid_attr in RELEASE_EXTID_ATTRS:
+ a = getattr(self, simple_attr).data
+ # special case blank strings
+ if a == '':
+ a = None
+ setattr(re.ext_ids, simple_attr, a)
# bunch of complexity here to preserve old contrib metadata (eg,
# affiliation and extra) not included in current forms
# TODO: this may be broken; either way needs tests
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index 633cbb22..a61c7c6d 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -239,7 +239,7 @@ def webcapture_view(ident):
@app.route('/release/lookup', methods=['GET'])
def release_lookup():
extid = None
- for key in ('doi', 'wikidata_qid', 'pmid', 'pmcid', 'isbn13', 'core_id'):
+ for key in ('doi', 'wikidata_qid', 'pmid', 'pmcid', 'isbn13', 'core', 'ark', 'mag'):
if request.args.get(key):
extid = key
break
diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html
index cefb0378..d1bf86c3 100644
--- a/python/fatcat_web/templates/entity_macros.html
+++ b/python/fatcat_web/templates/entity_macros.html
@@ -60,12 +60,12 @@
{% if release.release_date %}{{ release.release_date }}{% elif release.release_year %}{{ release.release_year }}{% else %}<i>unknown</i>{% endif %}
<td class="">
<b><a href="/release/{{ release.ident }}">{{ release.title }}</a></b>
- <br><small>{{ release.release_status or "unknown status" }}
+ <br><small>{{ release.release_stage or "unknown status" }}
| {{ release.release_type or "unknown type" }}
{% if release.license_slug %} | {{ release.license_slug }}{% endif %}
</small>
- {% if release.doi %}
- <br><a href="https://doi.org/{{ release.doi }}" style="color:green;">doi:{{ release.doi }}</a>
+ {% if release.ext_ids.doi %}
+ <br><a href="https://doi.org/{{ release.ext_ids.doi }}" style="color:green;">doi:{{ release.ext_ids.doi }}</a>
{% endif %}
{% endfor %}
</tbody>
diff --git a/python/fatcat_web/templates/release_edit.html b/python/fatcat_web/templates/release_edit.html
index 7bd4a00a..b3beec2b 100644
--- a/python/fatcat_web/templates/release_edit.html
+++ b/python/fatcat_web/templates/release_edit.html
@@ -20,7 +20,7 @@
<div class="twelve wide column" style="padding-bottom: 0px;">
<div class="ui equal width fields">
{{ edit_macros.form_field_basic(form.release_type, "required") }}
- {{ edit_macros.form_field_basic(form.release_status) }}
+ {{ edit_macros.form_field_basic(form.release_stage) }}
</div>
</div>
<div class="one wide column" style="padding-bottom: 0px;"></div>
@@ -133,7 +133,7 @@ $(document).ready(function() {
// these javascript dropdowns hide the original <input>, which breaks browser
// form focusing (eg, for required fields) :(
//$('#release_type').dropdown();
- //$('#release_status').dropdown();
+ //$('#release_stage').dropdown();
$('.ui.accordion').accordion();
var fixup_contrib_numbering = function(group_item) {
diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html
index 403af653..820c2797 100644
--- a/python/fatcat_web/templates/release_view.html
+++ b/python/fatcat_web/templates/release_view.html
@@ -55,13 +55,13 @@
<meta name="DC.citation.spage" content="{{ release.pages }}">
<meta name="citation_first_page" content="{{ release.pages }}">
{% endif %}
- {% if release.doi %}
- <meta name="DC.identifier" content="doi:{{ release.doi }}">
- <meta name="citation_doi" content="{{ release.doi }}">
+ {% if release.ext_ids.doi %}
+ <meta name="DC.identifier" content="doi:{{ release.ext_ids.doi }}">
+ <meta name="citation_doi" content="{{ release.ext_ids.doi }}">
{% endif %}
- {% if release.isbn13 %}
- <meta name="DC.identifier" content="isbn:{{ release.isbn13}}">
- <meta name="citation_isbn" content="{{ release.isbn13}}">
+ {% if release.ext_ids.isbn13 %}
+ <meta name="DC.identifier" content="isbn:{{ release.ext_ids.isbn13}}">
+ <meta name="citation_isbn" content="{{ release.ext_ids.isbn13}}">
{% endif %}
{% for file in (entity.files or [])[:10] %}{% for url in (file.urls or [])[:10] %}
{% if '//web.archive.org/web/' in url.url %}
@@ -101,7 +101,7 @@
<div class="ui accordion">
<div class="title" itemprop="isPartOf" itemscope itemtype="http://schema.org/Periodical" itemid="#container">
- {% if release.release_status == 'published' %}
+ {% if release.release_stage == 'published' %}
<i class="dropdown icon"></i>Published in <a href="/container/{{ container.ident }}"><span itemprop="name">{{ container.name }}</span></a>
{% else %}
<i class="dropdown icon"></i>Released as a <i>{{ release.release_type }}</i>
@@ -323,13 +323,13 @@
<span class="ui top attached fluid huge grey button"><i class="file cross icon"></i>No Full Text Available</span>
{% endif %}
-{% if release.release_type or release.release_status or release.release_year %}
+{% if release.release_type or release.release_stage or release.release_year %}
<div class="ui segment attached">
{% if release.release_type %}
<b>Type</b> &nbsp;<code>{{ release.release_type }}</code><br>
{% endif %}
- {% if release.release_status %}
- <b>Status</b> &nbsp; <code>{{ release.release_status or 'unknown' }}</code><br>
+ {% if release.release_stage %}
+ <b>Status</b> &nbsp; <code>{{ release.release_stage or 'unknown' }}</code><br>
{% endif %}
{% if release.release_date %}
<meta itemprop="datePublished" content="{{ release.release_date }}">
@@ -341,22 +341,22 @@
</div>
{% endif %}
-{% if release.doi or release.pmid or release.pmcid or release.wikidata_qid %}
+{% if release.ext_ids.doi or release.ext_ids.pmid or release.ext_ids.pmcid or release.ext_ids.wikidata_qid %}
<div class="ui segment attached" style="word-wrap: break-word;">
-{% if release.doi %}
- <b>DOI </b> &nbsp;<a href="https://doi.org/{{ release.doi }}" title="{{ release.doi }}" itemprop="sameAs">{{ release.doi }}</a><br>
+{% if release.ext_ids.doi %}
+ <b>DOI </b> &nbsp;<a href="https://doi.org/{{ release.ext_ids.doi }}" title="{{ release.ext_ids.doi }}" itemprop="sameAs">{{ release.ext_ids.doi }}</a><br>
{% endif %}
-{% if release.pmid != None %}
- <b>PubMed</b> <a href="https://www.ncbi.nlm.nih.gov/pubmed/{{ release.pmid }}">&nbsp;<code>{{ release.pmid }}</code></a><br>
+{% if release.ext_ids.pmid != None %}
+ <b>PubMed</b> <a href="https://www.ncbi.nlm.nih.gov/pubmed/{{ release.ext_ids.pmid }}">&nbsp;<code>{{ release.ext_ids.pmid }}</code></a><br>
{% endif %}
-{% if release.pmcid != None %}
- <b>PMC</b> <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/{{ release.pmcid }}">&nbsp;<code>{{ release.pmcid }}</code></a><br>
+{% if release.ext_ids.pmcid != None %}
+ <b>PMC</b> <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/{{ release.ext_ids.pmcid }}">&nbsp;<code>{{ release.ext_ids.pmcid }}</code></a><br>
{% endif %}
-{% if release.wikidata_qid != None %}
- <b>Wikidata</b> <a href="https://www.wikidata.org/wiki/{{ release.wikidata_qid }}">&nbsp;<code>{{ release.wikidata_qid }}</code></a><br>
+{% if release.ext_ids.wikidata_qid != None %}
+ <b>Wikidata</b> <a href="https://www.wikidata.org/wiki/{{ release.ext_ids.wikidata_qid }}">&nbsp;<code>{{ release.ext_ids.wikidata_qid }}</code></a><br>
{% endif %}
-{% if release.isbn13 != None %}
- <b>ISBN-13 </b> &nbsp;<code>{{ release.isbn13 }}</code>
+{% if release.ext_ids.isbn13 != None %}
+ <b>ISBN-13 </b> &nbsp;<code>{{ release.ext_ids.isbn13 }}</code>
{% endif %}
</div>
{% endif %}
@@ -429,17 +429,17 @@
{% if container != None and container.issnl != None %}
<a href="http://www.sherpa.ac.uk/romeo/issn/{{ container.issnl }}/">SHERPA/RoMEO</a> (journal policies)<br/>
{% endif %}
- {% if container != None and container.doi != None %}
- <a href="https://oadoi.org/{{ release.doi }}">oaDOI/unpaywall</a><br/>
+ {% if release != None and release.ext_ids.doi != None %}
+ <a href="https://oadoi.org/{{ release.ext_ids.doi }}">oaDOI/unpaywall</a><br/>
{% endif %}
- {% if release.isbn13 != None %}
- <a href="https://openlibrary.org/search?isbn={{ release.isbn13 }}">Open Library</a><br>
- <a href="http://www.worldcat.org/search?q=bn%3A{{ release.isbn13 }}">Worldcat</a><br>
+ {% if release.ext_ids.isbn13 != None %}
+ <a href="https://openlibrary.org/search?isbn={{ release.ext_ids.isbn13 }}">Open Library</a><br>
+ <a href="http://www.worldcat.org/search?q=bn%3A{{ release.ext_ids.isbn13 }}">Worldcat</a><br>
{% else %}
<a href="http://www.worldcat.org/search?qt=worldcat_org_art&q={{ release.title|urlencode }}">Worldcat</a><br>
{% endif %}
- {% if release.doi %}
- <a href="https://api.crossref.org/v1/works/http://dx.doi.org/{{ release.doi }}">Crossref Metadata</a> (via API)<br>
+ {% if release.ext_ids.doi %}
+ <a href="https://api.crossref.org/v1/works/http://dx.doi.org/{{ release.ext_ids.doi }}">Crossref Metadata</a> (via API)<br>
{% endif %}
<a href="https://www.wikidata.org/w/index.php?search={{ release.title|urlencode }}">wikidata.org</a><br>
<a href="https://core.ac.uk/search?q={{ release.title|urlencode }}">CORE.ac.uk</a><br>