aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/importers/datacite.py12
-rw-r--r--python/fatcat_tools/workers/elasticsearch.py15
-rw-r--r--python/fatcat_web/templates/container_view.html2
-rw-r--r--python/fatcat_web/templates/entity_macros.html2
-rw-r--r--python/fatcat_web/templates/release_view.html12
-rw-r--r--python/tests/files/datacite/datacite_doc_31.json53
-rw-r--r--python/tests/files/datacite/datacite_doc_32.json53
-rw-r--r--python/tests/files/datacite/datacite_result_31.json24
-rw-r--r--python/tests/files/datacite/datacite_result_32.json23
-rw-r--r--python/tests/import_datacite.py2
10 files changed, 187 insertions, 11 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 81f00876..d998f266 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -496,10 +496,12 @@ class DataciteImporter(EntityImporter):
if not desc.get('descriptionType') == 'Abstract':
continue
- # Description maybe a string or list.
+ # Description maybe a string, int or list.
text = desc.get('description', '')
if not text:
continue
+ if isinstance(text, int):
+ text = '{}'.format(text)
if isinstance(text, list):
try:
text = "\n".join(text)
@@ -758,6 +760,14 @@ class DataciteImporter(EntityImporter):
given_name = clean(given_name)
if surname:
surname = clean(surname)
+
+ # Perform a final assertion that name does not reduce to zero
+ # (e.g. whitespace only name).
+ if name:
+ name = name.strip()
+ if not name:
+ continue
+
if raw_affiliation == '':
continue
diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py
index 525f372b..e58b3da1 100644
--- a/python/fatcat_tools/workers/elasticsearch.py
+++ b/python/fatcat_tools/workers/elasticsearch.py
@@ -19,7 +19,7 @@ class ElasticsearchReleaseWorker(FatcatWorker):
def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None,
elasticsearch_backend="http://localhost:9200", elasticsearch_index="fatcat",
- batch_size=200):
+ batch_size=200, api_host="https://api.fatcat.wiki/v0"):
super().__init__(kafka_hosts=kafka_hosts,
consume_topic=consume_topic)
self.consumer_group = "elasticsearch-updates3"
@@ -30,9 +30,11 @@ class ElasticsearchReleaseWorker(FatcatWorker):
self.entity_type = ReleaseEntity
self.elasticsearch_document_name = "release"
self.transform_func = release_to_elasticsearch
+ self.api_host = api_host
def run(self):
ac = ApiClient()
+ api = public_api(self.api_host)
def fail_fast(err, partitions):
if err is not None:
@@ -103,13 +105,20 @@ class ElasticsearchReleaseWorker(FatcatWorker):
if entity_dict.get('name') and not entity_dict.get('title'):
continue
entity = entity_from_json(json_str, self.entity_type, api_client=ac)
+ if self.elasticsearch_document_name == "changelog":
+ key = entity.index
+ # might need to fetch from API
+ if not (entity.editgroup and entity.editgroup.editor):
+ entity = api.get_changelog_entry(entity.index)
+ else:
+ key = entity.ident
# TODO: handle deletions from index
bulk_actions.append(json.dumps({
- "index": { "_id": entity.ident, },
+ "index": { "_id": key, },
}))
bulk_actions.append(json.dumps(
self.transform_func(entity)))
- print("Upserting, eg, {} (of {} releases in elasticsearch)".format(entity.ident, len(batch)))
+ print("Upserting, eg, {} (of {} {} in elasticsearch)".format(key, len(batch), self.elasticsearch_document_name))
elasticsearch_endpoint = "{}/{}/{}/_bulk".format(
self.elasticsearch_backend,
self.elasticsearch_index,
diff --git a/python/fatcat_web/templates/container_view.html b/python/fatcat_web/templates/container_view.html
index 8d7c4cae..cf444956 100644
--- a/python/fatcat_web/templates/container_view.html
+++ b/python/fatcat_web/templates/container_view.html
@@ -23,6 +23,7 @@
Published by {{ container.publisher }}
{% endif %}
+{% if container.state == "active" %}
<h3>Search Releases from this Container</h3>
<form class="" role="search" action="/release/search" method="get">
<div class="ui form">
@@ -33,6 +34,7 @@
</div>
</div>
</form>
+{% endif %}
{% if container._random_releases %}
<h3>Example Publications</h3>
diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html
index 61de2d13..c22eb106 100644
--- a/python/fatcat_web/templates/entity_macros.html
+++ b/python/fatcat_web/templates/entity_macros.html
@@ -54,10 +54,12 @@
</a>
</div>
+{% if entity.ident %}
<div class="two ui buttons bottom attached">
<a href="{% if editgroup %}/editgroup/{{ editgroup.editgroup_id }}{% endif %}/{{ entity_type }}/{{ entity.ident }}/edit" class="ui blue button">Edit Metadata</a>
<a href="/{{ entity_type }}/{{ entity.ident }}/history" class="ui button">View History</a>
</div>
+{% endif %}
{%- endmacro %}
diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html
index d7c4e76e..60e4624e 100644
--- a/python/fatcat_web/templates/release_view.html
+++ b/python/fatcat_web/templates/release_view.html
@@ -3,9 +3,9 @@
{% import "entity_macros.html" as entity_macros %}
{% extends "entity_base.html" %}
-{# HTML metadata embeddings #}
-{% if release and release.status == "active" %}
- {% block extra_head %}
+{% block extra_head %}
+ {% if release and release.state == "active" %}
+ {# HTML metadata embeddings #}
<link rel="canonical" href="https://{{ config.FATCAT_DOMAIN }}/release/{{ entity.ident }}">
<meta name="twitter:card" content="summary">
@@ -70,8 +70,8 @@
<meta name="citation_pdf_url" content="{{ url.url }}">
{% endif %}
{% endfor %}{% endfor %}
- {% endblock %}
-{% endif %}
+ {% endif %}
+{% endblock %}
{% block main_extra_attr %}itemscope itemtype="http://schema.org/ScholarlyArticle" itemid="#release"{% endblock %}
@@ -388,7 +388,7 @@ accessible version.
<br>grouping other versions (eg, pre-print) and variants of this release
</div>
-{% if release._can_citeproc %}
+{% if release.state == "active" and release._can_citeproc %}
<div class="ui segment attached accordion">
<div class="title" style="padding: 0px;">
<i class="dropdown icon"></i><b>Cite This Release</b>
diff --git a/python/tests/files/datacite/datacite_doc_31.json b/python/tests/files/datacite/datacite_doc_31.json
new file mode 100644
index 00000000..83af3e4d
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_31.json
@@ -0,0 +1,53 @@
+{
+ "id": "10.17912/micropub.biology.000143",
+ "type": "dois",
+ "attributes": {
+ "doi": "10.17912/micropub.biology.000143",
+ "identifiers": null,
+ "creators": [
+ {
+ "raw_name": " ",
+ "givenName": "",
+ "familyName": "",
+ "affiliation": [],
+ "role": "author"
+ }
+ ],
+ "titles": [
+ {
+ "title": "Sample"
+ }
+ ],
+ "publisher": "microPublication Biology",
+ "publicationYear": 2019,
+ "types": {
+ "resourceTypeGeneral": "DataPaper"
+ },
+ "relatedIdentifiers": [],
+ "sizes": [],
+ "formats": [],
+ "version": null,
+ "rightsList": [],
+ "descriptions": [
+ {
+ "description": "Biological liquid-liquid phase separation",
+ "descriptionType": "Abstract"
+ }
+ ],
+ "geoLocations": [],
+ "fundingReferences": [],
+ "url": "https://www.micropublication.org/journals/biology/micropub.biology.000143",
+ "created": "2019-08-19T14:43:08.000Z",
+ "registered": "2019-08-19T14:43:09.000Z",
+ "published": "2019",
+ "updated": "2019-11-09T12:32:02.000Z"
+ },
+ "relationships": {
+ "client": {
+ "data": {
+ "id": "caltech.micropub",
+ "type": "clients"
+ }
+ }
+ }
+}
diff --git a/python/tests/files/datacite/datacite_doc_32.json b/python/tests/files/datacite/datacite_doc_32.json
new file mode 100644
index 00000000..7ea7e873
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_32.json
@@ -0,0 +1,53 @@
+{
+ "id": "10.17912/micropub.biology.000143",
+ "type": "dois",
+ "attributes": {
+ "doi": "10.17912/micropub.biology.000143",
+ "identifiers": null,
+ "creators": [
+ {
+ "raw_name": " ",
+ "givenName": "",
+ "familyName": "",
+ "affiliation": [],
+ "role": "author"
+ }
+ ],
+ "titles": [
+ {
+ "title": "Sample"
+ }
+ ],
+ "publisher": "microPublication Biology",
+ "publicationYear": 2019,
+ "types": {
+ "resourceTypeGeneral": "DataPaper"
+ },
+ "relatedIdentifiers": [],
+ "sizes": [],
+ "formats": [],
+ "version": null,
+ "rightsList": [],
+ "descriptions": [
+ {
+ "description": 1234567890,
+ "descriptionType": "Abstract"
+ }
+ ],
+ "geoLocations": [],
+ "fundingReferences": [],
+ "url": "https://www.micropublication.org/journals/biology/micropub.biology.000143",
+ "created": "2019-08-19T14:43:08.000Z",
+ "registered": "2019-08-19T14:43:09.000Z",
+ "published": "2019",
+ "updated": "2019-11-09T12:32:02.000Z"
+ },
+ "relationships": {
+ "client": {
+ "data": {
+ "id": "caltech.micropub",
+ "type": "clients"
+ }
+ }
+ }
+}
diff --git a/python/tests/files/datacite/datacite_result_31.json b/python/tests/files/datacite/datacite_result_31.json
new file mode 100644
index 00000000..193104b0
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_31.json
@@ -0,0 +1,24 @@
+{
+ "abstracts": [
+ {
+ "content": "Biological liquid-liquid phase separation",
+ "lang": "fr",
+ "mimetype": "text/plain"
+ }
+ ],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.17912/micropub.biology.000143"
+ },
+ "extra": {
+ "datacite": {
+ "resourceTypeGeneral": "DataPaper"
+ },
+ "container_name": "microPublication Biology"
+ },
+ "refs": [],
+ "release_stage": "published",
+ "release_year": 2019,
+ "publisher": "microPublication Biology",
+ "title": "Sample"
+}
diff --git a/python/tests/files/datacite/datacite_result_32.json b/python/tests/files/datacite/datacite_result_32.json
new file mode 100644
index 00000000..1a84a043
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_32.json
@@ -0,0 +1,23 @@
+{
+ "abstracts": [
+ {
+ "content": "1234567890",
+ "mimetype": "text/plain"
+ }
+ ],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.17912/micropub.biology.000143"
+ },
+ "extra": {
+ "datacite": {
+ "resourceTypeGeneral": "DataPaper"
+ },
+ "container_name": "microPublication Biology"
+ },
+ "refs": [],
+ "release_stage": "published",
+ "release_year": 2019,
+ "publisher": "microPublication Biology",
+ "title": "Sample"
+}
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 15650375..c9210ea4 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer):
for now.
"""
datacite_importer.debug = True
- for i in range(31):
+ for i in range(33):
src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)
dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)
with open(src, 'r') as f: