diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-13 14:23:02 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-13 14:23:06 -0700 | 
| commit | 955d4942d5111cbbd8f4b99d4e3c6d79262dab6e (patch) | |
| tree | 87757a7afdf00ab6de5ed7455267654bf49b5239 | |
| parent | 287e892abb4adac8ea7f7211524a41c7cd06851c (diff) | |
| download | fatcat-955d4942d5111cbbd8f4b99d4e3c6d79262dab6e.tar.gz fatcat-955d4942d5111cbbd8f4b99d4e3c6d79262dab6e.zip | |
partial python impl of ext_id and release_stage refactors
| -rwxr-xr-x | python/fatcat_tools/importers/cdl_dash_dat.py | 8 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 26 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 1 | ||||
| -rw-r--r-- | python/fatcat_tools/transforms/csl.py | 8 | ||||
| -rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 21 | ||||
| -rw-r--r-- | python/fatcat_web/forms.py | 24 | ||||
| -rw-r--r-- | python/fatcat_web/routes.py | 2 | ||||
| -rw-r--r-- | python/fatcat_web/templates/entity_macros.html | 6 | ||||
| -rw-r--r-- | python/fatcat_web/templates/release_edit.html | 4 | ||||
| -rw-r--r-- | python/fatcat_web/templates/release_view.html | 56 | 
10 files changed, 86 insertions, 70 deletions
| diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index 1a21194d..e1252b6d 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -96,7 +96,9 @@ def cdl_dash_release(meta, extra=None):          ))      r = ReleaseEntity( -        doi=doi, +        ext_ids=ReleaseEntityExtIds( +            doi=doi, +        ),          title=clean(meta['title'], force_xml=True),          publisher=clean(meta['publisher']),          release_year=int(meta['publicationYear']), @@ -170,9 +172,9 @@ def auto_cdl_dash_dat(api, dat_path, release_id=None, editgroup_id=None):                  agent="fatcat_tools.auto_cdl_dash_dat")))          editgroup_id = eg.editgroup_id -    if not release_id and release.doi: +    if not release_id and release.ext_ids.doi:          try: -            r = api.lookup_release(doi=release.doi) +            r = api.lookup_release(doi=release.ext_ids.doi)              release_id = r.ident          except fatcat_client.rest.ApiException:              pass diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 722f93aa..999ce13f 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -355,10 +355,10 @@ class CrossrefImporter(EntityImporter):          # release status          if obj['type'] in ('journal-article', 'conference-proceeding', 'book',                  'dissertation', 'book-chapter'): -            release_status = "published" +            release_stage = "published"          else:              # unknown -            release_status = None +            release_stage = None          # external identifiers          extids = self.lookup_ext_ids(doi=obj['DOI'].lower()) @@ -408,18 +408,20 @@ class CrossrefImporter(EntityImporter):              title=title,              original_title=original_title,              release_type=release_type, -            release_status=release_status, +            release_stage=release_stage,              release_date=release_date,              release_year=release_year,              publisher=publisher, -            doi=obj['DOI'].lower(), -            pmid=extids['pmid'], -            pmcid=extids['pmcid'], -            wikidata_qid=extids['wikidata_qid'], -            isbn13=isbn13, -            core_id=extids['core_id'], -            arxiv_id=extids['arxiv_id'], -            jstor_id=extids['jstor_id'], +            ext_ids=fatcat_client.ReleaseEntityExtIds( +                doi=obj['DOI'].lower(), +                pmid=extids['pmid'], +                pmcid=extids['pmcid'], +                wikidata_qid=extids['wikidata_qid'], +                isbn13=isbn13, +                core=extids['core_id'], +                arxiv=extids['arxiv_id'], +                jstor=extids['jstor_id'], +            ),              volume=clean(obj.get('volume')),              issue=clean(obj.get('issue')),              pages=clean(obj.get('page')), @@ -437,7 +439,7 @@ class CrossrefImporter(EntityImporter):          # lookup existing DOI (don't need to try other ext idents for crossref)          existing = None          try: -            existing = self.api.lookup_release(doi=re.doi) +            existing = self.api.lookup_release(doi=re.ext_ids.doi)          except fatcat_client.rest.ApiException as err:              if err.status != 404:                  raise err diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 7281a7a1..ba91d183 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -156,6 +156,7 @@ class GrobidMetadataImporter(EntityImporter):              volume=clean(obj['journal'].get('volume')),              issue=clean(obj['journal'].get('issue')),              abstracts=abstracts, +            ext_ids=fatcat_client.ReleaseEntityExtIds(),              extra=extra)          return re diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py index 7bc026ed..079e0501 100644 --- a/python/fatcat_tools/transforms/csl.py +++ b/python/fatcat_tools/transforms/csl.py @@ -109,13 +109,13 @@ def release_to_csl(entity):          container_title=entity.container and entity.container.name,          #container-title-short          #dimensions -        DOI=entity.doi, +        DOI=entity.ext_ids.doi,          #edition          #event          #event-place          #first-reference-note-number          #genre -        ISBN=entity.isbn13, +        ISBN=entity.ext_ids.isbn13,          ISSN=entity.container and entity.container.issnl,          issue=entity.issue,          #jurisdiction @@ -131,8 +131,8 @@ def release_to_csl(entity):          #original-title          # TODO: page=entity.pages,          page_first=entity.pages and entity.pages.split('-')[0], -        PMCID=entity.pmcid, -        PMID=entity.pmid, +        PMCID=entity.ext_ids.pmcid, +        PMID=entity.ext_ids.pmid,          publisher=(entity.container and entity.container.publisher) or entity.publisher,          #publisher-place          #references diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 73ac046e..971d1c11 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -45,17 +45,18 @@ def release_to_elasticsearch(entity, force_bool=True):          title = release.title,          original_title = release.original_title,          release_type = release.release_type, -        release_status = release.release_status, +        release_status = release.release_stage,          language = release.language,          license = release.license_slug, -        doi = release.doi, -        pmid = release.pmid, -        pmcid = release.pmcid, -        isbn13 = release.isbn13, -        wikidata_qid = release.wikidata_qid, -        core_id = release.core_id, -        arxiv_id = release.core_id, -        jstor_id = release.jstor_id, +        doi = release.ext_ids.doi, +        pmid = release.ext_ids.pmid, +        pmcid = release.ext_ids.pmcid, +        isbn13 = release.ext_ids.isbn13, +        wikidata_qid = release.ext_ids.wikidata_qid, +        core_id = release.ext_ids.core, +        arxiv_id = release.ext_ids.arxiv, +        jstor_id = release.ext_ids.jstor, +        # TODO: mag, ark      )      is_oa = None @@ -121,7 +122,7 @@ def release_to_elasticsearch(entity, force_bool=True):      else:          t['publisher'] = release.publisher -    if release.jstor_id or (release.doi and release.doi.startswith('10.2307/')): +    if release.ext_ids.jstor or (release.ext_ids.doi and release.ext_ids.doi.startswith('10.2307/')):          in_jstor = True      files = release.files or [] diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index 79365687..e22476ce 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -9,7 +9,7 @@ from wtforms import SelectField, DateField, StringField, IntegerField, \      HiddenField, FormField, FieldList, validators  from fatcat_client import ContainerEntity, CreatorEntity, FileEntity, \ -    ReleaseEntity, ReleaseContrib, FileEntityUrls +    ReleaseEntity, ReleaseContrib, FileEntityUrls, ReleaseEntityExtIds  release_type_options = [      ('', 'Unknown'), @@ -21,7 +21,7 @@ release_type_options = [      ('dataset', 'Dataset'),      ('stub', 'Invalid/Stub'),  ] -release_status_options = [ +release_stage_options = [      ('', 'Unknown'),      ('draft', 'Draft'),      ('submitted', 'Submitted'), @@ -62,9 +62,10 @@ class ReleaseContribForm(FlaskForm):          default='author')  RELEASE_SIMPLE_ATTRS = ['title', 'original_title', 'work_id', 'container_id', -    'release_type', 'release_status', 'release_date', 'doi', 'wikidata_qid', -    'isbn13', 'pmid', 'pmcid', 'volume', 'issue', 'pages', 'publisher', -    'language', 'license_slug'] +    'release_type', 'release_stage', 'release_date', 'volume', 'issue', +    'pages', 'publisher', 'language', 'license_slug'] + +RELEASE_EXTID_ATTRS = ['doi', 'wikidata_qid', 'isbn13', 'pmid', 'pmcid']  class ReleaseEntityForm(EntityEditForm):      """ @@ -85,7 +86,7 @@ class ReleaseEntityForm(EntityEditForm):          [validators.DataRequired()],          choices=release_type_options,          default='') -    release_status = SelectField(choices=release_status_options) +    release_stage = SelectField(choices=release_stage_options)      release_date = DateField('Release Date',          [validators.Optional(True)])      #release_year @@ -118,6 +119,9 @@ class ReleaseEntityForm(EntityEditForm):          for simple_attr in RELEASE_SIMPLE_ATTRS:              a = getattr(ref, simple_attr)              a.data = getattr(re, simple_attr) +        for extid_attr in RELEASE_EXTID_ATTRS: +            a = getattr(ref, extid_attr) +            a.data = getattr(re.ext_ids, extid_attr)          for i, c in enumerate(re.contribs):              rcf = ReleaseContribForm()              rcf.prev_index = i @@ -128,7 +132,7 @@ class ReleaseEntityForm(EntityEditForm):      def to_entity(self):          assert(self.title.data) -        entity = ReleaseEntity(title=self.title.data) +        entity = ReleaseEntity(title=self.title.data, ext_ids=ReleaseEntityExtIds())          self.update_entity(entity)          return entity @@ -145,6 +149,12 @@ class ReleaseEntityForm(EntityEditForm):              if a == '':                  a = None              setattr(re, simple_attr, a) +        for extid_attr in RELEASE_EXTID_ATTRS: +            a = getattr(self, simple_attr).data +            # special case blank strings +            if a == '': +                a = None +            setattr(re.ext_ids, simple_attr, a)          # bunch of complexity here to preserve old contrib metadata (eg,          # affiliation and extra) not included in current forms          # TODO: this may be broken; either way needs tests diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 633cbb22..a61c7c6d 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -239,7 +239,7 @@ def webcapture_view(ident):  @app.route('/release/lookup', methods=['GET'])  def release_lookup():      extid = None -    for key in ('doi', 'wikidata_qid', 'pmid', 'pmcid', 'isbn13', 'core_id'): +    for key in ('doi', 'wikidata_qid', 'pmid', 'pmcid', 'isbn13', 'core', 'ark', 'mag'):          if request.args.get(key):              extid = key              break diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html index cefb0378..d1bf86c3 100644 --- a/python/fatcat_web/templates/entity_macros.html +++ b/python/fatcat_web/templates/entity_macros.html @@ -60,12 +60,12 @@          {% if release.release_date %}{{ release.release_date }}{% elif release.release_year %}{{ release.release_year }}{% else %}<i>unknown</i>{% endif %}        <td class="">          <b><a href="/release/{{ release.ident }}">{{ release.title }}</a></b> -        <br><small>{{ release.release_status or "unknown status" }} +        <br><small>{{ release.release_stage or "unknown status" }}              | {{ release.release_type or "unknown type" }}              {% if release.license_slug %} | {{ release.license_slug }}{% endif %}          </small> -        {% if release.doi %} -          <br><a href="https://doi.org/{{ release.doi }}" style="color:green;">doi:{{ release.doi }}</a> +        {% if release.ext_ids.doi %} +          <br><a href="https://doi.org/{{ release.ext_ids.doi }}" style="color:green;">doi:{{ release.ext_ids.doi }}</a>          {% endif %}  {% endfor %}  </tbody> diff --git a/python/fatcat_web/templates/release_edit.html b/python/fatcat_web/templates/release_edit.html index 7bd4a00a..b3beec2b 100644 --- a/python/fatcat_web/templates/release_edit.html +++ b/python/fatcat_web/templates/release_edit.html @@ -20,7 +20,7 @@      <div class="twelve wide column" style="padding-bottom: 0px;">        <div class="ui equal width fields">          {{ edit_macros.form_field_basic(form.release_type, "required") }} -        {{ edit_macros.form_field_basic(form.release_status) }} +        {{ edit_macros.form_field_basic(form.release_stage) }}        </div>      </div>      <div class="one wide column" style="padding-bottom: 0px;"></div> @@ -133,7 +133,7 @@ $(document).ready(function() {    // these javascript dropdowns hide the original <input>, which breaks browser    // form focusing (eg, for required fields) :(    //$('#release_type').dropdown(); -  //$('#release_status').dropdown(); +  //$('#release_stage').dropdown();    $('.ui.accordion').accordion();    var fixup_contrib_numbering = function(group_item) { diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html index 403af653..820c2797 100644 --- a/python/fatcat_web/templates/release_view.html +++ b/python/fatcat_web/templates/release_view.html @@ -55,13 +55,13 @@        <meta name="DC.citation.spage" content="{{ release.pages }}">        <meta name="citation_first_page" content="{{ release.pages }}">      {% endif %} -    {% if release.doi %} -      <meta name="DC.identifier" content="doi:{{ release.doi }}"> -      <meta name="citation_doi" content="{{ release.doi }}"> +    {% if release.ext_ids.doi %} +      <meta name="DC.identifier" content="doi:{{ release.ext_ids.doi }}"> +      <meta name="citation_doi" content="{{ release.ext_ids.doi }}">      {% endif %} -    {% if release.isbn13 %} -      <meta name="DC.identifier" content="isbn:{{ release.isbn13}}"> -      <meta name="citation_isbn" content="{{ release.isbn13}}"> +    {% if release.ext_ids.isbn13 %} +      <meta name="DC.identifier" content="isbn:{{ release.ext_ids.isbn13}}"> +      <meta name="citation_isbn" content="{{ release.ext_ids.isbn13}}">      {% endif %}      {% for file in (entity.files or [])[:10] %}{% for url in (file.urls or [])[:10] %}        {% if '//web.archive.org/web/' in url.url %} @@ -101,7 +101,7 @@  <div class="ui accordion">  <div class="title" itemprop="isPartOf" itemscope itemtype="http://schema.org/Periodical" itemid="#container"> -  {% if release.release_status == 'published' %} +  {% if release.release_stage == 'published' %}      <i class="dropdown icon"></i>Published in <a href="/container/{{ container.ident }}"><span itemprop="name">{{ container.name }}</span></a>    {% else %}      <i class="dropdown icon"></i>Released as a <i>{{ release.release_type }}</i> @@ -323,13 +323,13 @@  <span class="ui top attached fluid huge grey button"><i class="file cross icon"></i>No Full Text Available</span>  {% endif %} -{% if release.release_type or release.release_status or release.release_year %} +{% if release.release_type or release.release_stage or release.release_year %}    <div class="ui segment attached">    {% if release.release_type %}      <b>Type</b>  <code>{{ release.release_type }}</code><br>    {% endif %} -  {% if release.release_status %} -    <b>Status</b>   <code>{{ release.release_status or 'unknown' }}</code><br> +  {% if release.release_stage %} +    <b>Status</b>   <code>{{ release.release_stage or 'unknown' }}</code><br>    {% endif %}    {% if release.release_date %}      <meta itemprop="datePublished" content="{{ release.release_date }}"> @@ -341,22 +341,22 @@    </div>  {% endif %} -{% if release.doi or release.pmid or release.pmcid or release.wikidata_qid %} +{% if release.ext_ids.doi or release.ext_ids.pmid or release.ext_ids.pmcid or release.ext_ids.wikidata_qid %}  <div class="ui segment attached" style="word-wrap: break-word;"> -{% if release.doi %} -  <b>DOI </b>  <a href="https://doi.org/{{ release.doi }}" title="{{ release.doi }}" itemprop="sameAs">{{ release.doi }}</a><br> +{% if release.ext_ids.doi %} +  <b>DOI </b>  <a href="https://doi.org/{{ release.ext_ids.doi }}" title="{{ release.ext_ids.doi }}" itemprop="sameAs">{{ release.ext_ids.doi }}</a><br>  {% endif %} -{% if release.pmid != None %} -  <b>PubMed</b> <a href="https://www.ncbi.nlm.nih.gov/pubmed/{{ release.pmid }}"> <code>{{ release.pmid }}</code></a><br> +{% if release.ext_ids.pmid != None %} +  <b>PubMed</b> <a href="https://www.ncbi.nlm.nih.gov/pubmed/{{ release.ext_ids.pmid }}"> <code>{{ release.ext_ids.pmid }}</code></a><br>  {% endif %} -{% if release.pmcid != None %} -  <b>PMC</b> <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/{{ release.pmcid }}"> <code>{{ release.pmcid }}</code></a><br> +{% if release.ext_ids.pmcid != None %} +  <b>PMC</b> <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/{{ release.ext_ids.pmcid }}"> <code>{{ release.ext_ids.pmcid }}</code></a><br>  {% endif %} -{% if release.wikidata_qid != None %} -  <b>Wikidata</b> <a href="https://www.wikidata.org/wiki/{{ release.wikidata_qid }}"> <code>{{ release.wikidata_qid }}</code></a><br> +{% if release.ext_ids.wikidata_qid != None %} +  <b>Wikidata</b> <a href="https://www.wikidata.org/wiki/{{ release.ext_ids.wikidata_qid }}"> <code>{{ release.ext_ids.wikidata_qid }}</code></a><br>  {% endif %} -{% if release.isbn13 != None %} -  <b>ISBN-13 </b>  <code>{{ release.isbn13 }}</code> +{% if release.ext_ids.isbn13 != None %} +  <b>ISBN-13 </b>  <code>{{ release.ext_ids.isbn13 }}</code>  {% endif %}  </div>  {% endif %} @@ -429,17 +429,17 @@    {% if container != None and container.issnl != None %}      <a href="http://www.sherpa.ac.uk/romeo/issn/{{ container.issnl }}/">SHERPA/RoMEO</a> (journal policies)<br/>    {% endif %} -  {% if container != None and container.doi != None %} -    <a href="https://oadoi.org/{{ release.doi }}">oaDOI/unpaywall</a><br/> +  {% if release != None and release.ext_ids.doi != None %} +    <a href="https://oadoi.org/{{ release.ext_ids.doi }}">oaDOI/unpaywall</a><br/>    {% endif %} -  {% if release.isbn13 != None %} -    <a href="https://openlibrary.org/search?isbn={{ release.isbn13 }}">Open Library</a><br> -    <a href="http://www.worldcat.org/search?q=bn%3A{{ release.isbn13 }}">Worldcat</a><br> +  {% if release.ext_ids.isbn13 != None %} +    <a href="https://openlibrary.org/search?isbn={{ release.ext_ids.isbn13 }}">Open Library</a><br> +    <a href="http://www.worldcat.org/search?q=bn%3A{{ release.ext_ids.isbn13 }}">Worldcat</a><br>    {% else %}      <a href="http://www.worldcat.org/search?qt=worldcat_org_art&q={{ release.title|urlencode }}">Worldcat</a><br>    {% endif %} -  {% if release.doi %} -  <a href="https://api.crossref.org/v1/works/http://dx.doi.org/{{ release.doi }}">Crossref Metadata</a> (via API)<br> +  {% if release.ext_ids.doi %} +  <a href="https://api.crossref.org/v1/works/http://dx.doi.org/{{ release.ext_ids.doi }}">Crossref Metadata</a> (via API)<br>    {% endif %}    <a href="https://www.wikidata.org/w/index.php?search={{ release.title|urlencode  }}">wikidata.org</a><br>    <a href="https://core.ac.uk/search?q={{ release.title|urlencode  }}">CORE.ac.uk</a><br> | 
