diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-15 13:11:52 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-15 13:15:15 -0800 |
commit | bb28a3fc1cc900f2dde31e1dbc492d9661034f41 (patch) | |
tree | f037dd3d1bab6cbf08a562dbdd4c09361fe0c030 /python/fatcat_tools/importers/matched.py | |
parent | 9f817c6c70a749f2ac449ab4edfd26c6dd8a7410 (diff) | |
download | fatcat-bb28a3fc1cc900f2dde31e1dbc492d9661034f41.tar.gz fatcat-bb28a3fc1cc900f2dde31e1dbc492d9661034f41.zip |
large refactor of python names/paths
- Add __init__.py files for fatcat_tools submodules, and use them in
imports
- Add a bunch of comments to files.
- rename a number of classes and functions to be less verbose
Diffstat (limited to 'python/fatcat_tools/importers/matched.py')
-rw-r--r-- | python/fatcat_tools/importers/matched.py | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 774019c7..732fccbe 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -4,7 +4,7 @@ import json import sqlite3 import itertools import fatcat_client -from fatcat_tools.importers.common import FatcatImporter +from .common import FatcatImporter #row = row.split('\t') #assert len(row) == 2 @@ -13,8 +13,14 @@ from fatcat_tools.importers.common import FatcatImporter #print(sha1) #dois = [d.lower() for d in json.loads(row[1])] -class FatcatMatchedImporter(FatcatImporter): +class MatchedImporter(FatcatImporter): """ + Importer for "file to crossref DOI" matches. + + These matches are currently generated by Internet Archive hadoop jobs + written in scala (part of the 'sandcrawler' repo/project), but could be + generated by other parties as well. + Input format is JSON with keys: - dois (list) - sha1 (hex) |