diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/harvest/oaipmh.py | 27 | 
1 files changed, 14 insertions, 13 deletions
| diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py index f5e3fe7a..4044ff10 100644 --- a/python/fatcat_tools/harvest/oaipmh.py +++ b/python/fatcat_tools/harvest/oaipmh.py @@ -86,22 +86,23 @@ class HarvestOaiPmhWorker:          produce_topic = self.kafka.topics[self.produce_topic]          # this dict kwargs hack is to work around 'from' as a reserved python keyword          # recommended by sickle docs -        records = api.ListRecords(**{ -            'metadataPrefix': self.metadata_prefix, -            'from': date_str, -            'until': date_str, -        }) +        try: +            records = api.ListRecords(**{ +                'metadataPrefix': self.metadata_prefix, +                'from': date_str, +                'until': date_str, +            }) +        except sickle.oaiexceptions.NoRecordsMatch: +            print("WARN: no OAI-PMH records for this date: {} (UTC)".format(date_str)) +            return          count = 0          with produce_topic.get_producer() as producer: -            try: -                for item in records: -                    count += 1 -                    if count % 50 == 0: -                        print("... up to {}".format(count)) -                    producer.produce(item.raw.encode('utf-8'), partition_key=item.header.identifier.encode('utf-8')) -            except sickle.oaiexceptions.NoRecordsMatch: -                print("WARN: no OAI-PMH records for this date: {} (UTC)".format(date_str)) +            for item in records: +                count += 1 +                if count % 50 == 0: +                    print("... up to {}".format(count)) +                producer.produce(item.raw.encode('utf-8'), partition_key=item.header.identifier.encode('utf-8'))      def run(self, continuous=False): | 
