From 30905f1effb33c3ef193d084120aa3fbd75d0b9b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 1 Jul 2020 18:35:24 -0700 Subject: lint (flake8) tool python files --- python/fatcat_tools/harvest/doi_registrars.py | 7 ------- python/fatcat_tools/harvest/harvest_common.py | 8 +++++--- python/fatcat_tools/harvest/oaipmh.py | 8 -------- python/fatcat_tools/harvest/pubmed.py | 2 +- 4 files changed, 6 insertions(+), 19 deletions(-) (limited to 'python/fatcat_tools/harvest') diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index 37628f09..2554fe96 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -1,16 +1,10 @@ -import re import sys -import csv import json import time -import itertools -import datetime -import requests from confluent_kafka import Producer, KafkaException from urllib.parse import urlparse, parse_qs -from fatcat_tools.workers import most_recent_message from .harvest_common import HarvestState, requests_retry_session @@ -64,7 +58,6 @@ class HarvestCrossrefWorker: to be careful how state is serialized back into kafka. """ - def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email, api_host_url="https://api.crossref.org/works", start_date=None, end_date=None): diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py index 27ab8b4a..bdae3054 100644 --- a/python/fatcat_tools/harvest/harvest_common.py +++ b/python/fatcat_tools/harvest/harvest_common.py @@ -1,15 +1,13 @@ import sys import json -import time import datetime import requests from requests.adapters import HTTPAdapter # unclear why pylint chokes on this import. Recent 'requests' and 'urllib3' are # in Pipenv.lock, and there are no errors in QA from requests.packages.urllib3.util.retry import Retry # pylint: disable=import-error -from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException, \ - OFFSET_BEGINNING +from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException # Used for parsing ISO date format (YYYY-MM-DD) @@ -130,9 +128,11 @@ class HarvestState: }).encode('utf-8') if kafka_topic: assert(kafka_config) + def fail_fast(err, msg): if err: raise KafkaException(err) + print("Committing status to Kafka: {}".format(kafka_topic), file=sys.stderr) producer_conf = kafka_config.copy() producer_conf.update({ @@ -159,9 +159,11 @@ class HarvestState: return print("Fetching state from kafka topic: {}".format(kafka_topic), file=sys.stderr) + def fail_fast(err, msg): if err: raise KafkaException(err) + conf = kafka_config.copy() conf.update({ 'group.id': 'dummy_init_group', # should never be committed diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py index d30f9507..a7dc3d8c 100644 --- a/python/fatcat_tools/harvest/oaipmh.py +++ b/python/fatcat_tools/harvest/oaipmh.py @@ -1,16 +1,9 @@ -import re import sys -import csv -import json import time -import itertools -import datetime -import requests import sickle from confluent_kafka import Producer, KafkaException -from fatcat_tools.workers import most_recent_message from .harvest_common import HarvestState @@ -31,7 +24,6 @@ class HarvestOaiPmhWorker: would want something similar operationally. Oh well! """ - def __init__(self, kafka_hosts, produce_topic, state_topic, start_date=None, end_date=None): diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py index f6301b8d..802d31d8 100644 --- a/python/fatcat_tools/harvest/pubmed.py +++ b/python/fatcat_tools/harvest/pubmed.py @@ -19,7 +19,7 @@ import tempfile import time import xml.etree.ElementTree as ET from ftplib import FTP -from urllib.parse import urljoin, urlparse +from urllib.parse import urlparse import dateparser from bs4 import BeautifulSoup -- cgit v1.2.3