diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat/crossref_importer.py | 4 | ||||
-rw-r--r-- | python/fatcat/manifest_importer.py | 17 | ||||
-rw-r--r-- | python/fatcat/templates/container_view.html | 3 | ||||
-rw-r--r-- | python/fatcat/templates/creator_view.html | 3 | ||||
-rw-r--r-- | python/fatcat/templates/file_view.html | 9 | ||||
-rw-r--r-- | python/fatcat/templates/release_view.html | 74 | ||||
-rw-r--r-- | python/tests/manifest.py | 34 |
7 files changed, 109 insertions, 35 deletions
diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py index 114520ed..ca31d241 100644 --- a/python/fatcat/crossref_importer.py +++ b/python/fatcat/crossref_importer.py @@ -44,7 +44,7 @@ class FatcatCrossrefImporter(FatcatImporter): contribs.append(fatcat_client.ReleaseContrib( creator_id=creator_id, index=i+1, - raw=raw_name, + raw_name=raw_name, role="author")) # container @@ -82,7 +82,7 @@ class FatcatCrossrefImporter(FatcatImporter): title=rm.get('title'), locator=rm.get('first-page'), # TODO: just dump JSON somewhere here? - raw=rm.get('unstructured'))) + extra=rm.get('unstructured'))) # release extra = dict(crossref={ diff --git a/python/fatcat/manifest_importer.py b/python/fatcat/manifest_importer.py index 47ebb020..7762d132 100644 --- a/python/fatcat/manifest_importer.py +++ b/python/fatcat/manifest_importer.py @@ -25,13 +25,27 @@ class FatcatManifestImporter(FatcatImporter): release_id = self.lookup_doi(doi.lower()) if release_id: release_ids = [release_id,] + if datetime is None: + datetime = "1" + urls = [] + if "//archive.org/" in url or "//arxiv.org/" in url: + # TODO: special-case the arxiv.org bulk mirror? + urls.append(fatcat_client.FileEntityUrls(url=url, rel="repository")) + elif "//web.archive.org/" in url or "//archive.is/" in url: + urls.append(fatcat_client.FileEntityUrls(url=url, rel="webarchive")) + else: + urls.append(fatcat_client.FileEntityUrls(url=url, rel="web")) + urls.append(fatcat_client.FileEntityUrls( + url="https://web.archive.org/web/{}/{}".format(datetime, url), + rel="webarchive")) + extra = None fe = fatcat_client.FileEntity( sha1=sha1, mimetype=mimetype, size=size_bytes, md5=md5, - url=url, + urls=urls, releases=release_ids, extra=extra) return fe @@ -44,7 +58,6 @@ class FatcatManifestImporter(FatcatImporter): def process_db(self, db_path, size=100): # TODO: multiple DOIs per sha1 # TODO: multiple URLs per sha1 (with schema change) - # TODO: a test! db = sqlite3.connect(db_path) last_sha1 = None diff --git a/python/fatcat/templates/container_view.html b/python/fatcat/templates/container_view.html index bcdbfc95..865cbaeb 100644 --- a/python/fatcat/templates/container_view.html +++ b/python/fatcat/templates/container_view.html @@ -24,6 +24,9 @@ {% if (container.extra != None) and (container.extra['url'] != None) and (container.extra['url']|length > 0) %} <br><b>Homepage:</b> <a href="{{ container.extra['url'] }}"> <code>{{ container.extra['url'] }}</code></a> {% endif %} +{% if container.wikidata_qid != None %} +<br><b>Wikidata Entity:</b> <a href="https://www.wikidata.org/wiki/{{ container.wikidata_qid }}"> <code>{{ container.wikidata_qid }}</code></a> +{% endif %} {% if container.extra != None %} <h3>Extra Metadata (raw JSON)</h3> diff --git a/python/fatcat/templates/creator_view.html b/python/fatcat/templates/creator_view.html index 3047a909..25a4f61a 100644 --- a/python/fatcat/templates/creator_view.html +++ b/python/fatcat/templates/creator_view.html @@ -18,6 +18,9 @@ <p><b>Sur ("family"/"last") name:</b> {% if creator.surname != None %}{{ creator.surname }}{% else %}<i>None or unknown</i>{% endif %} +{% if creator.wikidata_qid != None %} +<br><b>Wikidata Entity:</b> <a href="https://www.wikidata.org/wiki/{{ creator.wikidata_qid }}"> <code>{{ creator.wikidata_qid }}</code></a> +{% endif %} {% if creator.extra != None %} <h3>Extra Metadata (raw JSON)</h3> {% for (key, value) in creator.extra.items() %} diff --git a/python/fatcat/templates/file_view.html b/python/fatcat/templates/file_view.html index 54a6c18a..2934224d 100644 --- a/python/fatcat/templates/file_view.html +++ b/python/fatcat/templates/file_view.html @@ -36,8 +36,9 @@ This file is not associated with any fatcat release. {% if file.url != None %} <p>Known locations of this file: <ul> - <li><a href="{{ file.url }}">{{ file.url.split('/')[2] }}</a> - (<a href="https://web.archive.org/web/1/{{ file.url }}">Wayback mirror</a>) +{% for url in file.urls %} + <li><a href="{{ url.url }}">{{ url.url.split('/')[2] }}</a> ({{ url.rel }})<br> +{% endfor %} </ul> {% else %} No known public URL, mirror, or archive for this file. @@ -71,8 +72,8 @@ Raw Object: </div> <div class="five wide column"> -{% if file.url != None %} -<a href="https://web.archive.org/web/1/{{ file.url }}" class="ui top attached fluid huge green button"><i class="file icon"></i>Download File</a> +{% if file.urls != None and file.urls != [] %} +<a href="https://web.archive.org/web/1/{{ file.urls[0].url }}" class="ui top attached fluid huge green button"><i class="file icon"></i>Download File</a> {% else %} <span class="ui top attached fluid huge grey button"><i class="file cross icon"></i>No Download Available</span> {% endif %} diff --git a/python/fatcat/templates/release_view.html b/python/fatcat/templates/release_view.html index b3fe0aa6..d60df826 100644 --- a/python/fatcat/templates/release_view.html +++ b/python/fatcat/templates/release_view.html @@ -10,9 +10,9 @@ {% if authors != [] %} by {% endif %} {% for contrib in authors %} {% if contrib.creator_id %} - <b><a href="/creator/{{contrib.creator_id}}">{{ contrib.raw }}</a></b>{% if not loop.last %}, {% endif %} + <b><a href="/creator/{{contrib.creator_id}}">{{ contrib.raw_name }}</a></b>{% if not loop.last %}, {% endif %} {% else %} - {% if contrib.raw != None %}{{ contrib.raw }}{% else %}<i>Unknown</i>{% endif %}{% if not loop.last %}, {% endif %} + {% if contrib.raw_name != None %}{{ contrib.raw_name }}{% else %}<i>Unknown</i>{% endif %}{% if not loop.last %}, {% endif %} {% endif %} {% endfor %} </div> @@ -26,6 +26,15 @@ {% if release.doi != None %} <br><b>DOI:</b> <a href="https://doi.org/{{ release.doi }}"> <code>{{ release.doi }}</code></a> {% endif %} +{% if release.pmid != None %} +<br><b>PubMed:</b> <a href="https://www.ncbi.nlm.nih.gov/pubmed/{{ release.pmid }}"> <code>{{ release.pmid }}</code></a> +{% endif %} +{% if release.pmcid != None %} +<br><b>PubMed Central:</b> <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/{{ release.pmcid }}"> <code>{{ release.pmcid }}</code></a> +{% endif %} +{% if release.wikidata_qid != None %} +<br><b>Wikidata Entity:</b> <a href="https://www.wikidata.org/wiki/{{ release.wikidata_qid }}"> <code>{{ release.wikidata_qid }}</code></a> +{% endif %} {% if release.isbn13 != None %} <br><b>ISBN-13:</b> <code>{{ release.isbn13 }}</code> {% endif %} @@ -70,30 +79,13 @@ Raw Object: --> <br> -<h3>Files</h3> -{% if files != [] %} -Believed to represent this release... -<table class="ui compact table"> - <thead> - <tr><th>SHA-1 - <th>Size (bytes) - <th>File Type - <th>Links - </thead> - <tbody> - {% for file in files %} - <tr><td><small><code><a href="/file/{{ file.ident }}">{{ file.sha1}}</a></code></small> - <td>{% if file.size != None %}{{ file.size }}{% endif %} - <td>{% if file.mimetype != None %}{{ file.mimetype }}{% endif %} - <td><a href="{{ file.url }}">{{ file.url.split('/')[2] }}</a> - <br><a href="https://web.archive.org/web/1/{{ file.url }}">web.archive.org</a> +<h3>Abstracts</h3> +{% if release.abstracts != [] %} + {% for abstract in release.abstracts %} + <b>Abstract (<code>{{ abstract.sha1 }}</code>, {{ abstract.mimetype }}):</b> {{ abstract.content }} {% endfor %} - </tbody> -</table> -</ul> {% else %} -<p>There are no known files associated with this release (you could try -<a href="/work/{{ release.work_id }}">other releases for this work?</a>). +<p>No known abstracts. {% endif %} <br> @@ -109,9 +101,9 @@ Believed to represent this release... {% for contrib in release.contribs %} <tr><td>{{ contrib.index }} {% if contrib.creator_id %} - <td><a href="/creator/{{contrib.creator_id}}">{{ contrib.raw }}</a> + <td><a href="/creator/{{contrib.creator_id}}">{{ contrib.raw_name }}</a> {% else %} - <td>{{ contrib.raw }} + <td>{{ contrib.raw_name }} {% endif %} <td>{{ contrib.role or '' }} {% endfor %} @@ -122,12 +114,40 @@ Believed to represent this release... {% endif %} <br> +<h3>Files</h3> +{% if files != [] %} +Believed to represent this release... +<table class="ui compact table"> + <thead> + <tr><th>SHA-1 + <th>Size (bytes) + <th>File Type + <th>Links + </thead> + <tbody> + {% for file in files %} + <tr><td><small><code><a href="/file/{{ file.ident }}">{{ file.sha1}}</a></code></small> + <td>{% if file.size != None %}{{ file.size }}{% endif %} + <td>{% if file.mimetype != None %}{{ file.mimetype }}{% endif %} + <td>{% for url in file.urls %} + <a href="{{ url.url }}">{{ url.url.split('/')[2] }}</a> ({{ url.rel }})<br> + {% endfor %} + {% endfor %} + </tbody> +</table> +</ul> +{% else %} +<p>There are no known files associated with this release (you could try +<a href="/work/{{ release.work_id }}">other releases for this work?</a>). +{% endif %} + +<br> {% if release.refs.size != 0 %} <h3>References</h3> This release citing other releases. <ol> {% for ref in release.refs %} - <li>{% if ref.raw != None %}{{ ref.raw }}{% else %}<i>unknown</i>{% endif %} + <li>{% if ref.extra != None %}{{ ref.extra }}{% else %}<i>unknown</i>{% endif %} {% if ref.target_release_id != None %} (<a href="/release/{{ ref.target_release_id }}">fatcat release</a>) {% endif %} diff --git a/python/tests/manifest.py b/python/tests/manifest.py new file mode 100644 index 00000000..030d9e48 --- /dev/null +++ b/python/tests/manifest.py @@ -0,0 +1,34 @@ + +import json +import pytest +from fatcat.manifest_importer import FatcatManifestImporter + + +@pytest.fixture(scope="function") +def manifest_importer(): + yield FatcatManifestImporter("http://localhost:9411/v0") + +# TODO: use API to check that entities actually created... +#def test_manifest_importer_batch(manifest_importer): +# with open('tests/files/0000-0001-8254-7103.json', 'r') as f: +# manifest_importer.process_batch(f) + +#def test_manifest_importer(manifest_importer): +# with open('tests/files/0000-0001-8254-7103.json', 'r') as f: +# manifest_importer.process_source(f) + +def test_manifest_row_parse(manifest_importer): + # (sha1, mimetype, size_bytes, md5, doi, url, datetime) = row + + c = manifest_importer.parse_manifest_row( + (None, None, None, None, None, None, None)) + assert c == None + + c = manifest_importer.parse_manifest_row( + ("7d97e98f8af710c7e7fe703abc8f639e0ee507c4", "application/pdf", "12345", "8af710c7e7fe703abc8f639e0ee507c4", "10.1234/asdf", "https://example.com/thing.pdf", "200001010000")) + assert c.sha1 == "7d97e98f8af710c7e7fe703abc8f639e0ee507c4" + assert c.mimetype == "application/pdf" + assert c.urls[0].url == "https://example.com/thing.pdf" + assert c.urls[0].rel == "web" + assert c.urls[1].url == "https://web.archive.org/web/200001010000/https://example.com/thing.pdf" + assert c.urls[1].rel == "webarchive" |