From a2086616c23320153eacec7e4f0d3c6e1c6d7790 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 22 Jan 2019 18:28:41 -0800 Subject: new importer API interfaces --- python/fatcat_tools/importers/__init__.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'python/fatcat_tools/importers/__init__.py') diff --git a/python/fatcat_tools/importers/__init__.py b/python/fatcat_tools/importers/__init__.py index 47fc1fd3..f2caca5c 100644 --- a/python/fatcat_tools/importers/__init__.py +++ b/python/fatcat_tools/importers/__init__.py @@ -1,7 +1,22 @@ +""" +To run an import you combine two classes; one each of: + +- RecordSource: somehow iterates over a source of raw records (eg, from a + database, Kafka, files on disk, stdin) and pushes into an entity importer. +- EntityImporter: class that a record iterator pushes raw (unparsed) records + into. The entity importer parses and decides what to do (ignore, update, + insert, etc). There is usually a primary entity type, though related entities + can be created along the way. Maintains API connection and editgroup/batch + state. + +""" + from .common import FatcatImporter, make_kafka_consumer from .crossref import CrossrefImporter, CROSSREF_TYPE_MAP from .grobid_metadata import GrobidMetadataImporter from .journal_metadata import JournalMetadataImporter from .matched import MatchedImporter from .orcid import OrcidImporter +from .kafka_source import KafkaSource +from .file_source import FileSource -- cgit v1.2.3