aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/__init__.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-22 18:28:41 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-22 18:28:41 -0800
commita2086616c23320153eacec7e4f0d3c6e1c6d7790 (patch)
tree6b6b563e776ba7bc02eb73df6ddeae318f10575b /python/fatcat_tools/importers/__init__.py
parent9ab88508ed710de9db06a27436042ac30a70676e (diff)
downloadfatcat-a2086616c23320153eacec7e4f0d3c6e1c6d7790.tar.gz
fatcat-a2086616c23320153eacec7e4f0d3c6e1c6d7790.zip
new importer API interfaces
Diffstat (limited to 'python/fatcat_tools/importers/__init__.py')
-rw-r--r--python/fatcat_tools/importers/__init__.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/__init__.py b/python/fatcat_tools/importers/__init__.py
index 47fc1fd3..f2caca5c 100644
--- a/python/fatcat_tools/importers/__init__.py
+++ b/python/fatcat_tools/importers/__init__.py
@@ -1,7 +1,22 @@
+"""
+To run an import you combine two classes; one each of:
+
+- RecordSource: somehow iterates over a source of raw records (eg, from a
+ database, Kafka, files on disk, stdin) and pushes into an entity importer.
+- EntityImporter: class that a record iterator pushes raw (unparsed) records
+ into. The entity importer parses and decides what to do (ignore, update,
+ insert, etc). There is usually a primary entity type, though related entities
+ can be created along the way. Maintains API connection and editgroup/batch
+ state.
+
+"""
+
from .common import FatcatImporter, make_kafka_consumer
from .crossref import CrossrefImporter, CROSSREF_TYPE_MAP
from .grobid_metadata import GrobidMetadataImporter
from .journal_metadata import JournalMetadataImporter
from .matched import MatchedImporter
from .orcid import OrcidImporter
+from .kafka_source import KafkaSource
+from .file_source import FileSource