diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-12 17:50:12 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-12 19:49:50 -0800 |
commit | 91662c063b088bb665b86c252ecd27be9d9083be (patch) | |
tree | 6613d45fbfc7b63c67adff8b4f6aa1d531eda12b | |
parent | 0827b67a3f195f151a77ca01708e6c98daf778bf (diff) | |
download | fatcat-91662c063b088bb665b86c252ecd27be9d9083be.tar.gz fatcat-91662c063b088bb665b86c252ecd27be9d9083be.zip |
ensure importer description arg isn't clobbered
-rw-r--r-- | python/fatcat_tools/importers/arabesque.py | 3 | ||||
-rw-r--r-- | python/fatcat_tools/importers/common.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/matched.py | 3 |
3 files changed, 5 insertions, 5 deletions
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index 7017c56c..acfc2b87 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -42,8 +42,7 @@ class ArabesqueMatchImporter(EntityImporter): def __init__(self, api, extid_type, require_grobid=True, **kwargs): - eg_desc = kwargs.get('editgroup_description', - "Match web crawl files to releases based on identifier/URL seedlist") + eg_desc = kwargs.get('editgroup_description', None) or "Match web crawl files to releases based on identifier/URL seedlist" eg_extra = kwargs.get('editgroup_extra', dict()) eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.ArabesqueMatchImporter') if kwargs.get('crawl_id'): diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 4a3cd648..13b1e5b8 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -6,6 +6,7 @@ import json import ftfy import base64 import sqlite3 +import datetime import subprocess import unicodedata from collections import Counter @@ -756,10 +757,11 @@ class KafkaJsonPusher(RecordPusher): print("... got {} kafka messages ({}sec poll interval)".format( len(batch), self.poll_interval)) if not batch: - if datetime.datetime.now() - last_push > datetime.timedelta(minutes=5): + if datetime.datetime.now() - last_push > datetime.timedelta(seconds=30): #XXX minutes=5 # it has been some time, so flush any current editgroup self.importer.finish() last_push = datetime.datetime.now() + #print("Flushed any partial import batch: {}".format(self.importer.counts)) continue # first check errors on entire batch... for msg in batch: diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index dbb78ec9..180d7ba3 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -35,8 +35,7 @@ class MatchedImporter(EntityImporter): def __init__(self, api, **kwargs): - eg_desc = kwargs.pop('editgroup_description', - "Import of large-scale file-to-release match results. Source of metadata varies.") + eg_desc = kwargs.pop('editgroup_description', None) or "Import of large-scale file-to-release match results. Source of metadata varies." eg_extra = kwargs.pop('editgroup_extra', dict()) eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.MatchedImporter') super().__init__(api, |