From 5b0d7c1cd40a7a255886703c00d060eacfc27901 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 21 Nov 2018 10:06:26 -0800 Subject: fix oai-pmh issue again --- python/fatcat_tools/harvest/oaipmh.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'python') diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py index f5e3fe7a..4044ff10 100644 --- a/python/fatcat_tools/harvest/oaipmh.py +++ b/python/fatcat_tools/harvest/oaipmh.py @@ -86,22 +86,23 @@ class HarvestOaiPmhWorker: produce_topic = self.kafka.topics[self.produce_topic] # this dict kwargs hack is to work around 'from' as a reserved python keyword # recommended by sickle docs - records = api.ListRecords(**{ - 'metadataPrefix': self.metadata_prefix, - 'from': date_str, - 'until': date_str, - }) + try: + records = api.ListRecords(**{ + 'metadataPrefix': self.metadata_prefix, + 'from': date_str, + 'until': date_str, + }) + except sickle.oaiexceptions.NoRecordsMatch: + print("WARN: no OAI-PMH records for this date: {} (UTC)".format(date_str)) + return count = 0 with produce_topic.get_producer() as producer: - try: - for item in records: - count += 1 - if count % 50 == 0: - print("... up to {}".format(count)) - producer.produce(item.raw.encode('utf-8'), partition_key=item.header.identifier.encode('utf-8')) - except sickle.oaiexceptions.NoRecordsMatch: - print("WARN: no OAI-PMH records for this date: {} (UTC)".format(date_str)) + for item in records: + count += 1 + if count % 50 == 0: + print("... up to {}".format(count)) + producer.produce(item.raw.encode('utf-8'), partition_key=item.header.identifier.encode('utf-8')) def run(self, continuous=False): -- cgit v1.2.3