diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-07-25 17:24:49 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-07-25 17:24:49 -0700 |
commit | 376763ce13c86d78f5e30c5660bb3b767a0c44fc (patch) | |
tree | 8f31e97202ff21cc7e51f71cdb314d9e94a3233a /python/fatcat/manifest_importer.py | |
parent | 829765848a57190a7bb7fd4ff985f49d61055e97 (diff) | |
download | fatcat-376763ce13c86d78f5e30c5660bb3b767a0c44fc.tar.gz fatcat-376763ce13c86d78f5e30c5660bb3b767a0c44fc.zip |
webface updaes for newer schema (and abstracts)
Diffstat (limited to 'python/fatcat/manifest_importer.py')
-rw-r--r-- | python/fatcat/manifest_importer.py | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/python/fatcat/manifest_importer.py b/python/fatcat/manifest_importer.py index 47ebb020..7762d132 100644 --- a/python/fatcat/manifest_importer.py +++ b/python/fatcat/manifest_importer.py @@ -25,13 +25,27 @@ class FatcatManifestImporter(FatcatImporter): release_id = self.lookup_doi(doi.lower()) if release_id: release_ids = [release_id,] + if datetime is None: + datetime = "1" + urls = [] + if "//archive.org/" in url or "//arxiv.org/" in url: + # TODO: special-case the arxiv.org bulk mirror? + urls.append(fatcat_client.FileEntityUrls(url=url, rel="repository")) + elif "//web.archive.org/" in url or "//archive.is/" in url: + urls.append(fatcat_client.FileEntityUrls(url=url, rel="webarchive")) + else: + urls.append(fatcat_client.FileEntityUrls(url=url, rel="web")) + urls.append(fatcat_client.FileEntityUrls( + url="https://web.archive.org/web/{}/{}".format(datetime, url), + rel="webarchive")) + extra = None fe = fatcat_client.FileEntity( sha1=sha1, mimetype=mimetype, size=size_bytes, md5=md5, - url=url, + urls=urls, releases=release_ids, extra=extra) return fe @@ -44,7 +58,6 @@ class FatcatManifestImporter(FatcatImporter): def process_db(self, db_path, size=100): # TODO: multiple DOIs per sha1 # TODO: multiple URLs per sha1 (with schema change) - # TODO: a test! db = sqlite3.connect(db_path) last_sha1 = None |