aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers/__init__.py')
-rw-r--r--python/fatcat_tools/importers/__init__.py19
1 files changed, 17 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/__init__.py b/python/fatcat_tools/importers/__init__.py
index e6f081e5..70f38f5b 100644
--- a/python/fatcat_tools/importers/__init__.py
+++ b/python/fatcat_tools/importers/__init__.py
@@ -1,7 +1,22 @@
-from .common import FatcatImporter, make_kafka_consumer
+"""
+To run an import you combine two classes; one each of:
+
+- RecordSource: somehow iterates over a source of raw records (eg, from a
+ database, Kafka, files on disk, stdin) and pushes into an entity importer.
+- EntityImporter: class that a record iterator pushes raw (unparsed) records
+ into. The entity importer parses and decides what to do (ignore, update,
+ insert, etc). There is usually a primary entity type, though related entities
+ can be created along the way. Maintains API connection and editgroup/batch
+ state.
+
+"""
+
+from .common import EntityImporter, JsonLinePusher, LinePusher, CsvPusher, KafkaJsonPusher, make_kafka_consumer, clean
from .crossref import CrossrefImporter, CROSSREF_TYPE_MAP
from .grobid_metadata import GrobidMetadataImporter
-from .issn import IssnImporter
+from .journal_metadata import JournalMetadataImporter
from .matched import MatchedImporter
from .orcid import OrcidImporter
+#from .kafka_source import KafkaSource
+#from .file_source import FileSource