From 91662c063b088bb665b86c252ecd27be9d9083be Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 12 Dec 2019 17:50:12 -0800 Subject: ensure importer description arg isn't clobbered --- python/fatcat_tools/importers/arabesque.py | 3 +-- python/fatcat_tools/importers/common.py | 4 +++- python/fatcat_tools/importers/matched.py | 3 +-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index 7017c56c..acfc2b87 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -42,8 +42,7 @@ class ArabesqueMatchImporter(EntityImporter): def __init__(self, api, extid_type, require_grobid=True, **kwargs): - eg_desc = kwargs.get('editgroup_description', - "Match web crawl files to releases based on identifier/URL seedlist") + eg_desc = kwargs.get('editgroup_description', None) or "Match web crawl files to releases based on identifier/URL seedlist" eg_extra = kwargs.get('editgroup_extra', dict()) eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.ArabesqueMatchImporter') if kwargs.get('crawl_id'): diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 4a3cd648..13b1e5b8 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -6,6 +6,7 @@ import json import ftfy import base64 import sqlite3 +import datetime import subprocess import unicodedata from collections import Counter @@ -756,10 +757,11 @@ class KafkaJsonPusher(RecordPusher): print("... got {} kafka messages ({}sec poll interval)".format( len(batch), self.poll_interval)) if not batch: - if datetime.datetime.now() - last_push > datetime.timedelta(minutes=5): + if datetime.datetime.now() - last_push > datetime.timedelta(seconds=30): #XXX minutes=5 # it has been some time, so flush any current editgroup self.importer.finish() last_push = datetime.datetime.now() + #print("Flushed any partial import batch: {}".format(self.importer.counts)) continue # first check errors on entire batch... for msg in batch: diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index dbb78ec9..180d7ba3 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -35,8 +35,7 @@ class MatchedImporter(EntityImporter): def __init__(self, api, **kwargs): - eg_desc = kwargs.pop('editgroup_description', - "Import of large-scale file-to-release match results. Source of metadata varies.") + eg_desc = kwargs.pop('editgroup_description', None) or "Import of large-scale file-to-release match results. Source of metadata varies." eg_extra = kwargs.pop('editgroup_extra', dict()) eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.MatchedImporter') super().__init__(api, -- cgit v1.2.3