aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat/manifest_importer.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-07-25 17:24:49 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-07-25 17:24:49 -0700
commit376763ce13c86d78f5e30c5660bb3b767a0c44fc (patch)
tree8f31e97202ff21cc7e51f71cdb314d9e94a3233a /python/fatcat/manifest_importer.py
parent829765848a57190a7bb7fd4ff985f49d61055e97 (diff)
downloadfatcat-376763ce13c86d78f5e30c5660bb3b767a0c44fc.tar.gz
fatcat-376763ce13c86d78f5e30c5660bb3b767a0c44fc.zip
webface updaes for newer schema (and abstracts)
Diffstat (limited to 'python/fatcat/manifest_importer.py')
-rw-r--r--python/fatcat/manifest_importer.py17
1 files changed, 15 insertions, 2 deletions
diff --git a/python/fatcat/manifest_importer.py b/python/fatcat/manifest_importer.py
index 47ebb020..7762d132 100644
--- a/python/fatcat/manifest_importer.py
+++ b/python/fatcat/manifest_importer.py
@@ -25,13 +25,27 @@ class FatcatManifestImporter(FatcatImporter):
release_id = self.lookup_doi(doi.lower())
if release_id:
release_ids = [release_id,]
+ if datetime is None:
+ datetime = "1"
+ urls = []
+ if "//archive.org/" in url or "//arxiv.org/" in url:
+ # TODO: special-case the arxiv.org bulk mirror?
+ urls.append(fatcat_client.FileEntityUrls(url=url, rel="repository"))
+ elif "//web.archive.org/" in url or "//archive.is/" in url:
+ urls.append(fatcat_client.FileEntityUrls(url=url, rel="webarchive"))
+ else:
+ urls.append(fatcat_client.FileEntityUrls(url=url, rel="web"))
+ urls.append(fatcat_client.FileEntityUrls(
+ url="https://web.archive.org/web/{}/{}".format(datetime, url),
+ rel="webarchive"))
+
extra = None
fe = fatcat_client.FileEntity(
sha1=sha1,
mimetype=mimetype,
size=size_bytes,
md5=md5,
- url=url,
+ urls=urls,
releases=release_ids,
extra=extra)
return fe
@@ -44,7 +58,6 @@ class FatcatManifestImporter(FatcatImporter):
def process_db(self, db_path, size=100):
# TODO: multiple DOIs per sha1
# TODO: multiple URLs per sha1 (with schema change)
- # TODO: a test!
db = sqlite3.connect(db_path)
last_sha1 = None