diff options
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 12 | ||||
-rw-r--r-- | python/fatcat_tools/workers/elasticsearch.py | 15 | ||||
-rw-r--r-- | python/fatcat_web/templates/container_view.html | 2 | ||||
-rw-r--r-- | python/fatcat_web/templates/entity_macros.html | 2 | ||||
-rw-r--r-- | python/fatcat_web/templates/release_view.html | 12 | ||||
-rw-r--r-- | python/tests/files/datacite/datacite_doc_31.json | 53 | ||||
-rw-r--r-- | python/tests/files/datacite/datacite_doc_32.json | 53 | ||||
-rw-r--r-- | python/tests/files/datacite/datacite_result_31.json | 24 | ||||
-rw-r--r-- | python/tests/files/datacite/datacite_result_32.json | 23 | ||||
-rw-r--r-- | python/tests/import_datacite.py | 2 |
10 files changed, 187 insertions, 11 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 81f00876..d998f266 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -496,10 +496,12 @@ class DataciteImporter(EntityImporter): if not desc.get('descriptionType') == 'Abstract': continue - # Description maybe a string or list. + # Description maybe a string, int or list. text = desc.get('description', '') if not text: continue + if isinstance(text, int): + text = '{}'.format(text) if isinstance(text, list): try: text = "\n".join(text) @@ -758,6 +760,14 @@ class DataciteImporter(EntityImporter): given_name = clean(given_name) if surname: surname = clean(surname) + + # Perform a final assertion that name does not reduce to zero + # (e.g. whitespace only name). + if name: + name = name.strip() + if not name: + continue + if raw_affiliation == '': continue diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py index 525f372b..e58b3da1 100644 --- a/python/fatcat_tools/workers/elasticsearch.py +++ b/python/fatcat_tools/workers/elasticsearch.py @@ -19,7 +19,7 @@ class ElasticsearchReleaseWorker(FatcatWorker): def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None, elasticsearch_backend="http://localhost:9200", elasticsearch_index="fatcat", - batch_size=200): + batch_size=200, api_host="https://api.fatcat.wiki/v0"): super().__init__(kafka_hosts=kafka_hosts, consume_topic=consume_topic) self.consumer_group = "elasticsearch-updates3" @@ -30,9 +30,11 @@ class ElasticsearchReleaseWorker(FatcatWorker): self.entity_type = ReleaseEntity self.elasticsearch_document_name = "release" self.transform_func = release_to_elasticsearch + self.api_host = api_host def run(self): ac = ApiClient() + api = public_api(self.api_host) def fail_fast(err, partitions): if err is not None: @@ -103,13 +105,20 @@ class ElasticsearchReleaseWorker(FatcatWorker): if entity_dict.get('name') and not entity_dict.get('title'): continue entity = entity_from_json(json_str, self.entity_type, api_client=ac) + if self.elasticsearch_document_name == "changelog": + key = entity.index + # might need to fetch from API + if not (entity.editgroup and entity.editgroup.editor): + entity = api.get_changelog_entry(entity.index) + else: + key = entity.ident # TODO: handle deletions from index bulk_actions.append(json.dumps({ - "index": { "_id": entity.ident, }, + "index": { "_id": key, }, })) bulk_actions.append(json.dumps( self.transform_func(entity))) - print("Upserting, eg, {} (of {} releases in elasticsearch)".format(entity.ident, len(batch))) + print("Upserting, eg, {} (of {} {} in elasticsearch)".format(key, len(batch), self.elasticsearch_document_name)) elasticsearch_endpoint = "{}/{}/{}/_bulk".format( self.elasticsearch_backend, self.elasticsearch_index, diff --git a/python/fatcat_web/templates/container_view.html b/python/fatcat_web/templates/container_view.html index 8d7c4cae..cf444956 100644 --- a/python/fatcat_web/templates/container_view.html +++ b/python/fatcat_web/templates/container_view.html @@ -23,6 +23,7 @@ Published by {{ container.publisher }} {% endif %} +{% if container.state == "active" %} <h3>Search Releases from this Container</h3> <form class="" role="search" action="/release/search" method="get"> <div class="ui form"> @@ -33,6 +34,7 @@ </div> </div> </form> +{% endif %} {% if container._random_releases %} <h3>Example Publications</h3> diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html index 61de2d13..c22eb106 100644 --- a/python/fatcat_web/templates/entity_macros.html +++ b/python/fatcat_web/templates/entity_macros.html @@ -54,10 +54,12 @@ </a> </div> +{% if entity.ident %} <div class="two ui buttons bottom attached"> <a href="{% if editgroup %}/editgroup/{{ editgroup.editgroup_id }}{% endif %}/{{ entity_type }}/{{ entity.ident }}/edit" class="ui blue button">Edit Metadata</a> <a href="/{{ entity_type }}/{{ entity.ident }}/history" class="ui button">View History</a> </div> +{% endif %} {%- endmacro %} diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html index d7c4e76e..60e4624e 100644 --- a/python/fatcat_web/templates/release_view.html +++ b/python/fatcat_web/templates/release_view.html @@ -3,9 +3,9 @@ {% import "entity_macros.html" as entity_macros %} {% extends "entity_base.html" %} -{# HTML metadata embeddings #} -{% if release and release.status == "active" %} - {% block extra_head %} +{% block extra_head %} + {% if release and release.state == "active" %} + {# HTML metadata embeddings #} <link rel="canonical" href="https://{{ config.FATCAT_DOMAIN }}/release/{{ entity.ident }}"> <meta name="twitter:card" content="summary"> @@ -70,8 +70,8 @@ <meta name="citation_pdf_url" content="{{ url.url }}"> {% endif %} {% endfor %}{% endfor %} - {% endblock %} -{% endif %} + {% endif %} +{% endblock %} {% block main_extra_attr %}itemscope itemtype="http://schema.org/ScholarlyArticle" itemid="#release"{% endblock %} @@ -388,7 +388,7 @@ accessible version. <br>grouping other versions (eg, pre-print) and variants of this release </div> -{% if release._can_citeproc %} +{% if release.state == "active" and release._can_citeproc %} <div class="ui segment attached accordion"> <div class="title" style="padding: 0px;"> <i class="dropdown icon"></i><b>Cite This Release</b> diff --git a/python/tests/files/datacite/datacite_doc_31.json b/python/tests/files/datacite/datacite_doc_31.json new file mode 100644 index 00000000..83af3e4d --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_31.json @@ -0,0 +1,53 @@ +{ + "id": "10.17912/micropub.biology.000143", + "type": "dois", + "attributes": { + "doi": "10.17912/micropub.biology.000143", + "identifiers": null, + "creators": [ + { + "raw_name": " ", + "givenName": "", + "familyName": "", + "affiliation": [], + "role": "author" + } + ], + "titles": [ + { + "title": "Sample" + } + ], + "publisher": "microPublication Biology", + "publicationYear": 2019, + "types": { + "resourceTypeGeneral": "DataPaper" + }, + "relatedIdentifiers": [], + "sizes": [], + "formats": [], + "version": null, + "rightsList": [], + "descriptions": [ + { + "description": "Biological liquid-liquid phase separation", + "descriptionType": "Abstract" + } + ], + "geoLocations": [], + "fundingReferences": [], + "url": "https://www.micropublication.org/journals/biology/micropub.biology.000143", + "created": "2019-08-19T14:43:08.000Z", + "registered": "2019-08-19T14:43:09.000Z", + "published": "2019", + "updated": "2019-11-09T12:32:02.000Z" + }, + "relationships": { + "client": { + "data": { + "id": "caltech.micropub", + "type": "clients" + } + } + } +} diff --git a/python/tests/files/datacite/datacite_doc_32.json b/python/tests/files/datacite/datacite_doc_32.json new file mode 100644 index 00000000..7ea7e873 --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_32.json @@ -0,0 +1,53 @@ +{ + "id": "10.17912/micropub.biology.000143", + "type": "dois", + "attributes": { + "doi": "10.17912/micropub.biology.000143", + "identifiers": null, + "creators": [ + { + "raw_name": " ", + "givenName": "", + "familyName": "", + "affiliation": [], + "role": "author" + } + ], + "titles": [ + { + "title": "Sample" + } + ], + "publisher": "microPublication Biology", + "publicationYear": 2019, + "types": { + "resourceTypeGeneral": "DataPaper" + }, + "relatedIdentifiers": [], + "sizes": [], + "formats": [], + "version": null, + "rightsList": [], + "descriptions": [ + { + "description": 1234567890, + "descriptionType": "Abstract" + } + ], + "geoLocations": [], + "fundingReferences": [], + "url": "https://www.micropublication.org/journals/biology/micropub.biology.000143", + "created": "2019-08-19T14:43:08.000Z", + "registered": "2019-08-19T14:43:09.000Z", + "published": "2019", + "updated": "2019-11-09T12:32:02.000Z" + }, + "relationships": { + "client": { + "data": { + "id": "caltech.micropub", + "type": "clients" + } + } + } +} diff --git a/python/tests/files/datacite/datacite_result_31.json b/python/tests/files/datacite/datacite_result_31.json new file mode 100644 index 00000000..193104b0 --- /dev/null +++ b/python/tests/files/datacite/datacite_result_31.json @@ -0,0 +1,24 @@ +{ + "abstracts": [ + { + "content": "Biological liquid-liquid phase separation", + "lang": "fr", + "mimetype": "text/plain" + } + ], + "contribs": [], + "ext_ids": { + "doi": "10.17912/micropub.biology.000143" + }, + "extra": { + "datacite": { + "resourceTypeGeneral": "DataPaper" + }, + "container_name": "microPublication Biology" + }, + "refs": [], + "release_stage": "published", + "release_year": 2019, + "publisher": "microPublication Biology", + "title": "Sample" +} diff --git a/python/tests/files/datacite/datacite_result_32.json b/python/tests/files/datacite/datacite_result_32.json new file mode 100644 index 00000000..1a84a043 --- /dev/null +++ b/python/tests/files/datacite/datacite_result_32.json @@ -0,0 +1,23 @@ +{ + "abstracts": [ + { + "content": "1234567890", + "mimetype": "text/plain" + } + ], + "contribs": [], + "ext_ids": { + "doi": "10.17912/micropub.biology.000143" + }, + "extra": { + "datacite": { + "resourceTypeGeneral": "DataPaper" + }, + "container_name": "microPublication Biology" + }, + "refs": [], + "release_stage": "published", + "release_year": 2019, + "publisher": "microPublication Biology", + "title": "Sample" +} diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index 15650375..c9210ea4 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer): for now. """ datacite_importer.debug = True - for i in range(31): + for i in range(33): src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i) dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i) with open(src, 'r') as f: |