aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers/__init__.py')
-rw-r--r--python/fatcat_tools/importers/__init__.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/__init__.py b/python/fatcat_tools/importers/__init__.py
index 47fc1fd3..f2caca5c 100644
--- a/python/fatcat_tools/importers/__init__.py
+++ b/python/fatcat_tools/importers/__init__.py
@@ -1,7 +1,22 @@
+"""
+To run an import you combine two classes; one each of:
+
+- RecordSource: somehow iterates over a source of raw records (eg, from a
+ database, Kafka, files on disk, stdin) and pushes into an entity importer.
+- EntityImporter: class that a record iterator pushes raw (unparsed) records
+ into. The entity importer parses and decides what to do (ignore, update,
+ insert, etc). There is usually a primary entity type, though related entities
+ can be created along the way. Maintains API connection and editgroup/batch
+ state.
+
+"""
+
from .common import FatcatImporter, make_kafka_consumer
from .crossref import CrossrefImporter, CROSSREF_TYPE_MAP
from .grobid_metadata import GrobidMetadataImporter
from .journal_metadata import JournalMetadataImporter
from .matched import MatchedImporter
from .orcid import OrcidImporter
+from .kafka_source import KafkaSource
+from .file_source import FileSource