summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/__init__.py2
-rw-r--r--python/fatcat_tools/importers/common.py20
2 files changed, 21 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/__init__.py b/python/fatcat_tools/importers/__init__.py
index 2112785b..94802915 100644
--- a/python/fatcat_tools/importers/__init__.py
+++ b/python/fatcat_tools/importers/__init__.py
@@ -12,7 +12,7 @@ To run an import you combine two classes; one each of:
"""
-from .common import EntityImporter, JsonLinePusher, LinePusher, CsvPusher, KafkaJsonPusher, make_kafka_consumer, clean
+from .common import EntityImporter, JsonLinePusher, LinePusher, CsvPusher, SqlitePusher, KafkaJsonPusher, make_kafka_consumer, clean
from .crossref import CrossrefImporter, CROSSREF_TYPE_MAP
from .grobid_metadata import GrobidMetadataImporter
from .journal_metadata import JournalMetadataImporter
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index b89c3828..23cf2cc7 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -4,6 +4,7 @@ import sys
import csv
import json
import ftfy
+import sqlite3
import itertools
import subprocess
from collections import Counter
@@ -419,6 +420,25 @@ class LinePusher(RecordPusher):
return counts
+class SqlitePusher(RecordPusher):
+
+ def __init__(self, importer, db_file, table_name, where_clause="", **kwargs):
+ self.importer = importer
+ self.db = sqlite3.connect(db_file, isolation_level='EXCLUSIVE')
+ self.db.row_factory = sqlite3.Row
+ self.table_name = table_name
+ self.where_clause = where_clause
+
+ def run(self):
+ cur = self.db.execute("SELECT * FROM {} {};".format(
+ self.table_name, self.where_clause))
+ for row in cur:
+ self.importer.push_record(row)
+ counts = self.importer.finish()
+ print(counts)
+ return counts
+
+
class KafkaJsonPusher(RecordPusher):
def __init__(self, importer, kafka_hosts, kafka_env, topic_suffix, group, **kwargs):