summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/matched.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-15 13:11:52 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-15 13:15:15 -0800
commitbb28a3fc1cc900f2dde31e1dbc492d9661034f41 (patch)
treef037dd3d1bab6cbf08a562dbdd4c09361fe0c030 /python/fatcat_tools/importers/matched.py
parent9f817c6c70a749f2ac449ab4edfd26c6dd8a7410 (diff)
downloadfatcat-bb28a3fc1cc900f2dde31e1dbc492d9661034f41.tar.gz
fatcat-bb28a3fc1cc900f2dde31e1dbc492d9661034f41.zip
large refactor of python names/paths
- Add __init__.py files for fatcat_tools submodules, and use them in imports - Add a bunch of comments to files. - rename a number of classes and functions to be less verbose
Diffstat (limited to 'python/fatcat_tools/importers/matched.py')
-rw-r--r--python/fatcat_tools/importers/matched.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 774019c7..732fccbe 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -4,7 +4,7 @@ import json
import sqlite3
import itertools
import fatcat_client
-from fatcat_tools.importers.common import FatcatImporter
+from .common import FatcatImporter
#row = row.split('\t')
#assert len(row) == 2
@@ -13,8 +13,14 @@ from fatcat_tools.importers.common import FatcatImporter
#print(sha1)
#dois = [d.lower() for d in json.loads(row[1])]
-class FatcatMatchedImporter(FatcatImporter):
+class MatchedImporter(FatcatImporter):
"""
+ Importer for "file to crossref DOI" matches.
+
+ These matches are currently generated by Internet Archive hadoop jobs
+ written in scala (part of the 'sandcrawler' repo/project), but could be
+ generated by other parties as well.
+
Input format is JSON with keys:
- dois (list)
- sha1 (hex)