diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-12 22:09:28 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-12 22:09:28 -0700 |
commit | c63b663e2fcfb0c4544653f7f30f7a548103ef2b (patch) | |
tree | 2ad6c21e62d087b45fc95345cf507cee5cb5e9d2 /python/fatcat_tools | |
parent | b3ca15488e3ffdad53dbadf2d2db2ee54b482216 (diff) | |
download | fatcat-c63b663e2fcfb0c4544653f7f30f7a548103ef2b.tar.gz fatcat-c63b663e2fcfb0c4544653f7f30f7a548103ef2b.zip |
add SqlitePusher importer option
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/__init__.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/common.py | 20 |
2 files changed, 21 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/__init__.py b/python/fatcat_tools/importers/__init__.py index 2112785b..94802915 100644 --- a/python/fatcat_tools/importers/__init__.py +++ b/python/fatcat_tools/importers/__init__.py @@ -12,7 +12,7 @@ To run an import you combine two classes; one each of: """ -from .common import EntityImporter, JsonLinePusher, LinePusher, CsvPusher, KafkaJsonPusher, make_kafka_consumer, clean +from .common import EntityImporter, JsonLinePusher, LinePusher, CsvPusher, SqlitePusher, KafkaJsonPusher, make_kafka_consumer, clean from .crossref import CrossrefImporter, CROSSREF_TYPE_MAP from .grobid_metadata import GrobidMetadataImporter from .journal_metadata import JournalMetadataImporter diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index b89c3828..23cf2cc7 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -4,6 +4,7 @@ import sys import csv import json import ftfy +import sqlite3 import itertools import subprocess from collections import Counter @@ -419,6 +420,25 @@ class LinePusher(RecordPusher): return counts +class SqlitePusher(RecordPusher): + + def __init__(self, importer, db_file, table_name, where_clause="", **kwargs): + self.importer = importer + self.db = sqlite3.connect(db_file, isolation_level='EXCLUSIVE') + self.db.row_factory = sqlite3.Row + self.table_name = table_name + self.where_clause = where_clause + + def run(self): + cur = self.db.execute("SELECT * FROM {} {};".format( + self.table_name, self.where_clause)) + for row in cur: + self.importer.push_record(row) + counts = self.importer.finish() + print(counts) + return counts + + class KafkaJsonPusher(RecordPusher): def __init__(self, importer, kafka_hosts, kafka_env, topic_suffix, group, **kwargs): |