diff options
Diffstat (limited to 'python/fatcat_tools/importers/__init__.py')
-rw-r--r-- | python/fatcat_tools/importers/__init__.py | 19 |
1 files changed, 17 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/__init__.py b/python/fatcat_tools/importers/__init__.py index e6f081e5..70f38f5b 100644 --- a/python/fatcat_tools/importers/__init__.py +++ b/python/fatcat_tools/importers/__init__.py @@ -1,7 +1,22 @@ -from .common import FatcatImporter, make_kafka_consumer +""" +To run an import you combine two classes; one each of: + +- RecordSource: somehow iterates over a source of raw records (eg, from a + database, Kafka, files on disk, stdin) and pushes into an entity importer. +- EntityImporter: class that a record iterator pushes raw (unparsed) records + into. The entity importer parses and decides what to do (ignore, update, + insert, etc). There is usually a primary entity type, though related entities + can be created along the way. Maintains API connection and editgroup/batch + state. + +""" + +from .common import EntityImporter, JsonLinePusher, LinePusher, CsvPusher, KafkaJsonPusher, make_kafka_consumer, clean from .crossref import CrossrefImporter, CROSSREF_TYPE_MAP from .grobid_metadata import GrobidMetadataImporter -from .issn import IssnImporter +from .journal_metadata import JournalMetadataImporter from .matched import MatchedImporter from .orcid import OrcidImporter +#from .kafka_source import KafkaSource +#from .file_source import FileSource |