diff options
Diffstat (limited to 'python/fatcat_tools')
| -rw-r--r-- | python/fatcat_tools/importers/__init__.py | 2 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/common.py | 20 | 
2 files changed, 21 insertions, 1 deletions
| diff --git a/python/fatcat_tools/importers/__init__.py b/python/fatcat_tools/importers/__init__.py index 2112785b..94802915 100644 --- a/python/fatcat_tools/importers/__init__.py +++ b/python/fatcat_tools/importers/__init__.py @@ -12,7 +12,7 @@ To run an import you combine two classes; one each of:  """ -from .common import EntityImporter, JsonLinePusher, LinePusher, CsvPusher, KafkaJsonPusher, make_kafka_consumer, clean +from .common import EntityImporter, JsonLinePusher, LinePusher, CsvPusher, SqlitePusher, KafkaJsonPusher, make_kafka_consumer, clean  from .crossref import CrossrefImporter, CROSSREF_TYPE_MAP  from .grobid_metadata import GrobidMetadataImporter  from .journal_metadata import JournalMetadataImporter diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index b89c3828..23cf2cc7 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -4,6 +4,7 @@ import sys  import csv  import json  import ftfy +import sqlite3  import itertools  import subprocess  from collections import Counter @@ -419,6 +420,25 @@ class LinePusher(RecordPusher):          return counts +class SqlitePusher(RecordPusher): + +    def __init__(self, importer, db_file, table_name, where_clause="", **kwargs): +        self.importer = importer +        self.db = sqlite3.connect(db_file, isolation_level='EXCLUSIVE') +        self.db.row_factory = sqlite3.Row +        self.table_name = table_name +        self.where_clause = where_clause + +    def run(self): +        cur = self.db.execute("SELECT * FROM {} {};".format( +            self.table_name, self.where_clause)) +        for row in cur: +            self.importer.push_record(row) +        counts = self.importer.finish() +        print(counts) +        return counts + +  class KafkaJsonPusher(RecordPusher):      def __init__(self, importer, kafka_hosts, kafka_env, topic_suffix, group, **kwargs): | 
