diff options
Diffstat (limited to 'python/fatcat_tools/importers/matched.py')
-rw-r--r-- | python/fatcat_tools/importers/matched.py | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 774019c7..732fccbe 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -4,7 +4,7 @@ import json import sqlite3 import itertools import fatcat_client -from fatcat_tools.importers.common import FatcatImporter +from .common import FatcatImporter #row = row.split('\t') #assert len(row) == 2 @@ -13,8 +13,14 @@ from fatcat_tools.importers.common import FatcatImporter #print(sha1) #dois = [d.lower() for d in json.loads(row[1])] -class FatcatMatchedImporter(FatcatImporter): +class MatchedImporter(FatcatImporter): """ + Importer for "file to crossref DOI" matches. + + These matches are currently generated by Internet Archive hadoop jobs + written in scala (part of the 'sandcrawler' repo/project), but could be + generated by other parties as well. + Input format is JSON with keys: - dois (list) - sha1 (hex) |