diff options
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/arabesque.py | 3 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/common.py | 4 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 3 | 
3 files changed, 5 insertions, 5 deletions
| diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index 7017c56c..acfc2b87 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -42,8 +42,7 @@ class ArabesqueMatchImporter(EntityImporter):      def __init__(self, api, extid_type, require_grobid=True, **kwargs): -        eg_desc = kwargs.get('editgroup_description', -            "Match web crawl files to releases based on identifier/URL seedlist") +        eg_desc = kwargs.get('editgroup_description', None) or "Match web crawl files to releases based on identifier/URL seedlist"          eg_extra = kwargs.get('editgroup_extra', dict())          eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.ArabesqueMatchImporter')          if kwargs.get('crawl_id'): diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 4a3cd648..13b1e5b8 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -6,6 +6,7 @@ import json  import ftfy  import base64  import sqlite3 +import datetime  import subprocess  import unicodedata  from collections import Counter @@ -756,10 +757,11 @@ class KafkaJsonPusher(RecordPusher):              print("... got {} kafka messages ({}sec poll interval)".format(                  len(batch), self.poll_interval))              if not batch: -                if datetime.datetime.now() - last_push > datetime.timedelta(minutes=5): +                if datetime.datetime.now() - last_push > datetime.timedelta(seconds=30): #XXX minutes=5                      # it has been some time, so flush any current editgroup                      self.importer.finish()                      last_push = datetime.datetime.now() +                    #print("Flushed any partial import batch: {}".format(self.importer.counts))                  continue              # first check errors on entire batch...              for msg in batch: diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index dbb78ec9..180d7ba3 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -35,8 +35,7 @@ class MatchedImporter(EntityImporter):      def __init__(self, api, **kwargs): -        eg_desc = kwargs.pop('editgroup_description', -            "Import of large-scale file-to-release match results. Source of metadata varies.") +        eg_desc = kwargs.pop('editgroup_description', None) or "Import of large-scale file-to-release match results. Source of metadata varies."          eg_extra = kwargs.pop('editgroup_extra', dict())          eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.MatchedImporter')          super().__init__(api, | 
