From 376763ce13c86d78f5e30c5660bb3b767a0c44fc Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 25 Jul 2018 17:24:49 -0700 Subject: webface updaes for newer schema (and abstracts) --- python/fatcat/crossref_importer.py | 4 +- python/fatcat/manifest_importer.py | 17 ++++++- python/fatcat/templates/container_view.html | 3 ++ python/fatcat/templates/creator_view.html | 3 ++ python/fatcat/templates/file_view.html | 9 ++-- python/fatcat/templates/release_view.html | 74 ++++++++++++++++++----------- python/tests/manifest.py | 34 +++++++++++++ 7 files changed, 109 insertions(+), 35 deletions(-) create mode 100644 python/tests/manifest.py diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py index 114520ed..ca31d241 100644 --- a/python/fatcat/crossref_importer.py +++ b/python/fatcat/crossref_importer.py @@ -44,7 +44,7 @@ class FatcatCrossrefImporter(FatcatImporter): contribs.append(fatcat_client.ReleaseContrib( creator_id=creator_id, index=i+1, - raw=raw_name, + raw_name=raw_name, role="author")) # container @@ -82,7 +82,7 @@ class FatcatCrossrefImporter(FatcatImporter): title=rm.get('title'), locator=rm.get('first-page'), # TODO: just dump JSON somewhere here? - raw=rm.get('unstructured'))) + extra=rm.get('unstructured'))) # release extra = dict(crossref={ diff --git a/python/fatcat/manifest_importer.py b/python/fatcat/manifest_importer.py index 47ebb020..7762d132 100644 --- a/python/fatcat/manifest_importer.py +++ b/python/fatcat/manifest_importer.py @@ -25,13 +25,27 @@ class FatcatManifestImporter(FatcatImporter): release_id = self.lookup_doi(doi.lower()) if release_id: release_ids = [release_id,] + if datetime is None: + datetime = "1" + urls = [] + if "//archive.org/" in url or "//arxiv.org/" in url: + # TODO: special-case the arxiv.org bulk mirror? + urls.append(fatcat_client.FileEntityUrls(url=url, rel="repository")) + elif "//web.archive.org/" in url or "//archive.is/" in url: + urls.append(fatcat_client.FileEntityUrls(url=url, rel="webarchive")) + else: + urls.append(fatcat_client.FileEntityUrls(url=url, rel="web")) + urls.append(fatcat_client.FileEntityUrls( + url="https://web.archive.org/web/{}/{}".format(datetime, url), + rel="webarchive")) + extra = None fe = fatcat_client.FileEntity( sha1=sha1, mimetype=mimetype, size=size_bytes, md5=md5, - url=url, + urls=urls, releases=release_ids, extra=extra) return fe @@ -44,7 +58,6 @@ class FatcatManifestImporter(FatcatImporter): def process_db(self, db_path, size=100): # TODO: multiple DOIs per sha1 # TODO: multiple URLs per sha1 (with schema change) - # TODO: a test! db = sqlite3.connect(db_path) last_sha1 = None diff --git a/python/fatcat/templates/container_view.html b/python/fatcat/templates/container_view.html index bcdbfc95..865cbaeb 100644 --- a/python/fatcat/templates/container_view.html +++ b/python/fatcat/templates/container_view.html @@ -24,6 +24,9 @@ {% if (container.extra != None) and (container.extra['url'] != None) and (container.extra['url']|length > 0) %}
Homepage:  {{ container.extra['url'] }} {% endif %} +{% if container.wikidata_qid != None %} +
Wikidata Entity:  {{ container.wikidata_qid }} +{% endif %} {% if container.extra != None %}

Extra Metadata (raw JSON)

diff --git a/python/fatcat/templates/creator_view.html b/python/fatcat/templates/creator_view.html index 3047a909..25a4f61a 100644 --- a/python/fatcat/templates/creator_view.html +++ b/python/fatcat/templates/creator_view.html @@ -18,6 +18,9 @@

Sur ("family"/"last") name: {% if creator.surname != None %}{{ creator.surname }}{% else %}None or unknown{% endif %} +{% if creator.wikidata_qid != None %} +
Wikidata Entity:  {{ creator.wikidata_qid }} +{% endif %} {% if creator.extra != None %}

Extra Metadata (raw JSON)

{% for (key, value) in creator.extra.items() %} diff --git a/python/fatcat/templates/file_view.html b/python/fatcat/templates/file_view.html index 54a6c18a..2934224d 100644 --- a/python/fatcat/templates/file_view.html +++ b/python/fatcat/templates/file_view.html @@ -36,8 +36,9 @@ This file is not associated with any fatcat release. {% if file.url != None %}

Known locations of this file:

{% else %} No known public URL, mirror, or archive for this file. @@ -71,8 +72,8 @@ Raw Object:
-{% if file.url != None %} -Download File +{% if file.urls != None and file.urls != [] %} +Download File {% else %} No Download Available {% endif %} diff --git a/python/fatcat/templates/release_view.html b/python/fatcat/templates/release_view.html index b3fe0aa6..d60df826 100644 --- a/python/fatcat/templates/release_view.html +++ b/python/fatcat/templates/release_view.html @@ -10,9 +10,9 @@ {% if authors != [] %} by {% endif %} {% for contrib in authors %} {% if contrib.creator_id %} - {{ contrib.raw }}{% if not loop.last %}, {% endif %} + {{ contrib.raw_name }}{% if not loop.last %}, {% endif %} {% else %} - {% if contrib.raw != None %}{{ contrib.raw }}{% else %}Unknown{% endif %}{% if not loop.last %}, {% endif %} + {% if contrib.raw_name != None %}{{ contrib.raw_name }}{% else %}Unknown{% endif %}{% if not loop.last %}, {% endif %} {% endif %} {% endfor %}
@@ -26,6 +26,15 @@ {% if release.doi != None %}
DOI:  {{ release.doi }} {% endif %} +{% if release.pmid != None %} +
PubMed:  {{ release.pmid }} +{% endif %} +{% if release.pmcid != None %} +
PubMed Central:  {{ release.pmcid }} +{% endif %} +{% if release.wikidata_qid != None %} +
Wikidata Entity:  {{ release.wikidata_qid }} +{% endif %} {% if release.isbn13 != None %}
ISBN-13:  {{ release.isbn13 }} {% endif %} @@ -70,30 +79,13 @@ Raw Object: -->
-

Files

-{% if files != [] %} -Believed to represent this release... - - - - - {% for file in files %} - -
SHA-1 - Size (bytes) - File Type - Links -
{{ file.sha1}} - {% if file.size != None %}{{ file.size }}{% endif %} - {% if file.mimetype != None %}{{ file.mimetype }}{% endif %} - {{ file.url.split('/')[2] }} -
web.archive.org +

Abstracts

+{% if release.abstracts != [] %} + {% for abstract in release.abstracts %} + Abstract ({{ abstract.sha1 }}, {{ abstract.mimetype }}): {{ abstract.content }} {% endfor %} -
- {% else %} -

There are no known files associated with this release (you could try -other releases for this work?). +

No known abstracts. {% endif %}
@@ -109,9 +101,9 @@ Believed to represent this release... {% for contrib in release.contribs %} {{ contrib.index }} {% if contrib.creator_id %} - {{ contrib.raw }} + {{ contrib.raw_name }} {% else %} - {{ contrib.raw }} + {{ contrib.raw_name }} {% endif %} {{ contrib.role or '' }} {% endfor %} @@ -121,13 +113,41 @@ Believed to represent this release...

Contributors (authors, etc) not known. {% endif %} +
+

Files

+{% if files != [] %} +Believed to represent this release... + + + + + {% for file in files %} + +
SHA-1 + Size (bytes) + File Type + Links +
{{ file.sha1}} + {% if file.size != None %}{{ file.size }}{% endif %} + {% if file.mimetype != None %}{{ file.mimetype }}{% endif %} + {% for url in file.urls %} + {{ url.url.split('/')[2] }} ({{ url.rel }})
+ {% endfor %} + {% endfor %} +
+ +{% else %} +

There are no known files associated with this release (you could try +other releases for this work?). +{% endif %} +
{% if release.refs.size != 0 %}

References

This release citing other releases.
    {% for ref in release.refs %} -
  1. {% if ref.raw != None %}{{ ref.raw }}{% else %}unknown{% endif %} +
  2. {% if ref.extra != None %}{{ ref.extra }}{% else %}unknown{% endif %} {% if ref.target_release_id != None %} (fatcat release) {% endif %} diff --git a/python/tests/manifest.py b/python/tests/manifest.py new file mode 100644 index 00000000..030d9e48 --- /dev/null +++ b/python/tests/manifest.py @@ -0,0 +1,34 @@ + +import json +import pytest +from fatcat.manifest_importer import FatcatManifestImporter + + +@pytest.fixture(scope="function") +def manifest_importer(): + yield FatcatManifestImporter("http://localhost:9411/v0") + +# TODO: use API to check that entities actually created... +#def test_manifest_importer_batch(manifest_importer): +# with open('tests/files/0000-0001-8254-7103.json', 'r') as f: +# manifest_importer.process_batch(f) + +#def test_manifest_importer(manifest_importer): +# with open('tests/files/0000-0001-8254-7103.json', 'r') as f: +# manifest_importer.process_source(f) + +def test_manifest_row_parse(manifest_importer): + # (sha1, mimetype, size_bytes, md5, doi, url, datetime) = row + + c = manifest_importer.parse_manifest_row( + (None, None, None, None, None, None, None)) + assert c == None + + c = manifest_importer.parse_manifest_row( + ("7d97e98f8af710c7e7fe703abc8f639e0ee507c4", "application/pdf", "12345", "8af710c7e7fe703abc8f639e0ee507c4", "10.1234/asdf", "https://example.com/thing.pdf", "200001010000")) + assert c.sha1 == "7d97e98f8af710c7e7fe703abc8f639e0ee507c4" + assert c.mimetype == "application/pdf" + assert c.urls[0].url == "https://example.com/thing.pdf" + assert c.urls[0].rel == "web" + assert c.urls[1].url == "https://web.archive.org/web/200001010000/https://example.com/thing.pdf" + assert c.urls[1].rel == "webarchive" -- cgit v1.2.3