summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/fatcat/crossref_importer.py4
-rw-r--r--python/fatcat/manifest_importer.py17
-rw-r--r--python/fatcat/templates/container_view.html3
-rw-r--r--python/fatcat/templates/creator_view.html3
-rw-r--r--python/fatcat/templates/file_view.html9
-rw-r--r--python/fatcat/templates/release_view.html74
-rw-r--r--python/tests/manifest.py34
7 files changed, 109 insertions, 35 deletions
diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py
index 114520ed..ca31d241 100644
--- a/python/fatcat/crossref_importer.py
+++ b/python/fatcat/crossref_importer.py
@@ -44,7 +44,7 @@ class FatcatCrossrefImporter(FatcatImporter):
contribs.append(fatcat_client.ReleaseContrib(
creator_id=creator_id,
index=i+1,
- raw=raw_name,
+ raw_name=raw_name,
role="author"))
# container
@@ -82,7 +82,7 @@ class FatcatCrossrefImporter(FatcatImporter):
title=rm.get('title'),
locator=rm.get('first-page'),
# TODO: just dump JSON somewhere here?
- raw=rm.get('unstructured')))
+ extra=rm.get('unstructured')))
# release
extra = dict(crossref={
diff --git a/python/fatcat/manifest_importer.py b/python/fatcat/manifest_importer.py
index 47ebb020..7762d132 100644
--- a/python/fatcat/manifest_importer.py
+++ b/python/fatcat/manifest_importer.py
@@ -25,13 +25,27 @@ class FatcatManifestImporter(FatcatImporter):
release_id = self.lookup_doi(doi.lower())
if release_id:
release_ids = [release_id,]
+ if datetime is None:
+ datetime = "1"
+ urls = []
+ if "//archive.org/" in url or "//arxiv.org/" in url:
+ # TODO: special-case the arxiv.org bulk mirror?
+ urls.append(fatcat_client.FileEntityUrls(url=url, rel="repository"))
+ elif "//web.archive.org/" in url or "//archive.is/" in url:
+ urls.append(fatcat_client.FileEntityUrls(url=url, rel="webarchive"))
+ else:
+ urls.append(fatcat_client.FileEntityUrls(url=url, rel="web"))
+ urls.append(fatcat_client.FileEntityUrls(
+ url="https://web.archive.org/web/{}/{}".format(datetime, url),
+ rel="webarchive"))
+
extra = None
fe = fatcat_client.FileEntity(
sha1=sha1,
mimetype=mimetype,
size=size_bytes,
md5=md5,
- url=url,
+ urls=urls,
releases=release_ids,
extra=extra)
return fe
@@ -44,7 +58,6 @@ class FatcatManifestImporter(FatcatImporter):
def process_db(self, db_path, size=100):
# TODO: multiple DOIs per sha1
# TODO: multiple URLs per sha1 (with schema change)
- # TODO: a test!
db = sqlite3.connect(db_path)
last_sha1 = None
diff --git a/python/fatcat/templates/container_view.html b/python/fatcat/templates/container_view.html
index bcdbfc95..865cbaeb 100644
--- a/python/fatcat/templates/container_view.html
+++ b/python/fatcat/templates/container_view.html
@@ -24,6 +24,9 @@
{% if (container.extra != None) and (container.extra['url'] != None) and (container.extra['url']|length > 0) %}
<br><b>Homepage:</b> <a href="{{ container.extra['url'] }}">&nbsp;<code>{{ container.extra['url'] }}</code></a>
{% endif %}
+{% if container.wikidata_qid != None %}
+<br><b>Wikidata Entity:</b> <a href="https://www.wikidata.org/wiki/{{ container.wikidata_qid }}">&nbsp;<code>{{ container.wikidata_qid }}</code></a>
+{% endif %}
{% if container.extra != None %}
<h3>Extra Metadata (raw JSON)</h3>
diff --git a/python/fatcat/templates/creator_view.html b/python/fatcat/templates/creator_view.html
index 3047a909..25a4f61a 100644
--- a/python/fatcat/templates/creator_view.html
+++ b/python/fatcat/templates/creator_view.html
@@ -18,6 +18,9 @@
<p><b>Sur ("family"/"last") name:</b>
{% if creator.surname != None %}{{ creator.surname }}{% else %}<i>None or unknown</i>{% endif %}
+{% if creator.wikidata_qid != None %}
+<br><b>Wikidata Entity:</b> <a href="https://www.wikidata.org/wiki/{{ creator.wikidata_qid }}">&nbsp;<code>{{ creator.wikidata_qid }}</code></a>
+{% endif %}
{% if creator.extra != None %}
<h3>Extra Metadata (raw JSON)</h3>
{% for (key, value) in creator.extra.items() %}
diff --git a/python/fatcat/templates/file_view.html b/python/fatcat/templates/file_view.html
index 54a6c18a..2934224d 100644
--- a/python/fatcat/templates/file_view.html
+++ b/python/fatcat/templates/file_view.html
@@ -36,8 +36,9 @@ This file is not associated with any fatcat release.
{% if file.url != None %}
<p>Known locations of this file:
<ul>
- <li><a href="{{ file.url }}">{{ file.url.split('/')[2] }}</a>
- (<a href="https://web.archive.org/web/1/{{ file.url }}">Wayback mirror</a>)
+{% for url in file.urls %}
+ <li><a href="{{ url.url }}">{{ url.url.split('/')[2] }}</a> ({{ url.rel }})<br>
+{% endfor %}
</ul>
{% else %}
No known public URL, mirror, or archive for this file.
@@ -71,8 +72,8 @@ Raw Object:
</div>
<div class="five wide column">
-{% if file.url != None %}
-<a href="https://web.archive.org/web/1/{{ file.url }}" class="ui top attached fluid huge green button"><i class="file icon"></i>Download File</a>
+{% if file.urls != None and file.urls != [] %}
+<a href="https://web.archive.org/web/1/{{ file.urls[0].url }}" class="ui top attached fluid huge green button"><i class="file icon"></i>Download File</a>
{% else %}
<span class="ui top attached fluid huge grey button"><i class="file cross icon"></i>No Download Available</span>
{% endif %}
diff --git a/python/fatcat/templates/release_view.html b/python/fatcat/templates/release_view.html
index b3fe0aa6..d60df826 100644
--- a/python/fatcat/templates/release_view.html
+++ b/python/fatcat/templates/release_view.html
@@ -10,9 +10,9 @@
{% if authors != [] %} by {% endif %}
{% for contrib in authors %}
{% if contrib.creator_id %}
- <b><a href="/creator/{{contrib.creator_id}}">{{ contrib.raw }}</a></b>{% if not loop.last %}, {% endif %}
+ <b><a href="/creator/{{contrib.creator_id}}">{{ contrib.raw_name }}</a></b>{% if not loop.last %}, {% endif %}
{% else %}
- {% if contrib.raw != None %}{{ contrib.raw }}{% else %}<i>Unknown</i>{% endif %}{% if not loop.last %}, {% endif %}
+ {% if contrib.raw_name != None %}{{ contrib.raw_name }}{% else %}<i>Unknown</i>{% endif %}{% if not loop.last %}, {% endif %}
{% endif %}
{% endfor %}
</div>
@@ -26,6 +26,15 @@
{% if release.doi != None %}
<br><b>DOI:</b> <a href="https://doi.org/{{ release.doi }}">&nbsp;<code>{{ release.doi }}</code></a>
{% endif %}
+{% if release.pmid != None %}
+<br><b>PubMed:</b> <a href="https://www.ncbi.nlm.nih.gov/pubmed/{{ release.pmid }}">&nbsp;<code>{{ release.pmid }}</code></a>
+{% endif %}
+{% if release.pmcid != None %}
+<br><b>PubMed Central:</b> <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/{{ release.pmcid }}">&nbsp;<code>{{ release.pmcid }}</code></a>
+{% endif %}
+{% if release.wikidata_qid != None %}
+<br><b>Wikidata Entity:</b> <a href="https://www.wikidata.org/wiki/{{ release.wikidata_qid }}">&nbsp;<code>{{ release.wikidata_qid }}</code></a>
+{% endif %}
{% if release.isbn13 != None %}
<br><b>ISBN-13:</b> &nbsp;<code>{{ release.isbn13 }}</code>
{% endif %}
@@ -70,30 +79,13 @@ Raw Object:
-->
<br>
-<h3>Files</h3>
-{% if files != [] %}
-Believed to represent this release...
-<table class="ui compact table">
- <thead>
- <tr><th>SHA-1
- <th>Size (bytes)
- <th>File Type
- <th>Links
- </thead>
- <tbody>
- {% for file in files %}
- <tr><td><small><code><a href="/file/{{ file.ident }}">{{ file.sha1}}</a></code></small>
- <td>{% if file.size != None %}{{ file.size }}{% endif %}
- <td>{% if file.mimetype != None %}{{ file.mimetype }}{% endif %}
- <td><a href="{{ file.url }}">{{ file.url.split('/')[2] }}</a>
- <br><a href="https://web.archive.org/web/1/{{ file.url }}">web.archive.org</a>
+<h3>Abstracts</h3>
+{% if release.abstracts != [] %}
+ {% for abstract in release.abstracts %}
+ <b>Abstract (<code>{{ abstract.sha1 }}</code>, {{ abstract.mimetype }}):</b> {{ abstract.content }}
{% endfor %}
- </tbody>
-</table>
-</ul>
{% else %}
-<p>There are no known files associated with this release (you could try
-<a href="/work/{{ release.work_id }}">other releases for this work?</a>).
+<p>No known abstracts.
{% endif %}
<br>
@@ -109,9 +101,9 @@ Believed to represent this release...
{% for contrib in release.contribs %}
<tr><td>{{ contrib.index }}
{% if contrib.creator_id %}
- <td><a href="/creator/{{contrib.creator_id}}">{{ contrib.raw }}</a>
+ <td><a href="/creator/{{contrib.creator_id}}">{{ contrib.raw_name }}</a>
{% else %}
- <td>{{ contrib.raw }}
+ <td>{{ contrib.raw_name }}
{% endif %}
<td>{{ contrib.role or '' }}
{% endfor %}
@@ -122,12 +114,40 @@ Believed to represent this release...
{% endif %}
<br>
+<h3>Files</h3>
+{% if files != [] %}
+Believed to represent this release...
+<table class="ui compact table">
+ <thead>
+ <tr><th>SHA-1
+ <th>Size (bytes)
+ <th>File Type
+ <th>Links
+ </thead>
+ <tbody>
+ {% for file in files %}
+ <tr><td><small><code><a href="/file/{{ file.ident }}">{{ file.sha1}}</a></code></small>
+ <td>{% if file.size != None %}{{ file.size }}{% endif %}
+ <td>{% if file.mimetype != None %}{{ file.mimetype }}{% endif %}
+ <td>{% for url in file.urls %}
+ <a href="{{ url.url }}">{{ url.url.split('/')[2] }}</a> ({{ url.rel }})<br>
+ {% endfor %}
+ {% endfor %}
+ </tbody>
+</table>
+</ul>
+{% else %}
+<p>There are no known files associated with this release (you could try
+<a href="/work/{{ release.work_id }}">other releases for this work?</a>).
+{% endif %}
+
+<br>
{% if release.refs.size != 0 %}
<h3>References</h3>
This release citing other releases.
<ol>
{% for ref in release.refs %}
- <li>{% if ref.raw != None %}{{ ref.raw }}{% else %}<i>unknown</i>{% endif %}
+ <li>{% if ref.extra != None %}{{ ref.extra }}{% else %}<i>unknown</i>{% endif %}
{% if ref.target_release_id != None %}
(<a href="/release/{{ ref.target_release_id }}">fatcat release</a>)
{% endif %}
diff --git a/python/tests/manifest.py b/python/tests/manifest.py
new file mode 100644
index 00000000..030d9e48
--- /dev/null
+++ b/python/tests/manifest.py
@@ -0,0 +1,34 @@
+
+import json
+import pytest
+from fatcat.manifest_importer import FatcatManifestImporter
+
+
+@pytest.fixture(scope="function")
+def manifest_importer():
+ yield FatcatManifestImporter("http://localhost:9411/v0")
+
+# TODO: use API to check that entities actually created...
+#def test_manifest_importer_batch(manifest_importer):
+# with open('tests/files/0000-0001-8254-7103.json', 'r') as f:
+# manifest_importer.process_batch(f)
+
+#def test_manifest_importer(manifest_importer):
+# with open('tests/files/0000-0001-8254-7103.json', 'r') as f:
+# manifest_importer.process_source(f)
+
+def test_manifest_row_parse(manifest_importer):
+ # (sha1, mimetype, size_bytes, md5, doi, url, datetime) = row
+
+ c = manifest_importer.parse_manifest_row(
+ (None, None, None, None, None, None, None))
+ assert c == None
+
+ c = manifest_importer.parse_manifest_row(
+ ("7d97e98f8af710c7e7fe703abc8f639e0ee507c4", "application/pdf", "12345", "8af710c7e7fe703abc8f639e0ee507c4", "10.1234/asdf", "https://example.com/thing.pdf", "200001010000"))
+ assert c.sha1 == "7d97e98f8af710c7e7fe703abc8f639e0ee507c4"
+ assert c.mimetype == "application/pdf"
+ assert c.urls[0].url == "https://example.com/thing.pdf"
+ assert c.urls[0].rel == "web"
+ assert c.urls[1].url == "https://web.archive.org/web/200001010000/https://example.com/thing.pdf"
+ assert c.urls[1].rel == "webarchive"