aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/harvest_pubmed.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-02 18:13:14 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-11-02 18:13:14 -0700
commitcdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7 (patch)
tree5e4034027b51f3ee4d2a488bb2cbb7a75c3bd0d8 /python/tests/harvest_pubmed.py
parent78f08280edea4ff65ca613ad30005c45cc48dea6 (diff)
downloadfatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.tar.gz
fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.zip
fmt (black): tests/
Diffstat (limited to 'python/tests/harvest_pubmed.py')
-rw-r--r--python/tests/harvest_pubmed.py31
1 files changed, 17 insertions, 14 deletions
diff --git a/python/tests/harvest_pubmed.py b/python/tests/harvest_pubmed.py
index aba766ba..422870f2 100644
--- a/python/tests/harvest_pubmed.py
+++ b/python/tests/harvest_pubmed.py
@@ -14,20 +14,20 @@ def test_pubmed_harvest_date(mocker):
# mock out the harvest state object so it doesn't try to actually connect
# to Kafka
- mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka')
+ mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka")
# Mocking a file fetched from FTP, should contain some 'PubmedArticle' elements.
# $ zcat tests/files/pubmedsample_2019.xml.gz | grep -c '<PubmedArticle>'
# 176
- file_to_retrieve = os.path.join(os.path.dirname(__file__), 'files/pubmedsample_2019.xml.gz')
- ftpretr = mocker.patch('fatcat_tools.harvest.pubmed.ftpretr')
+ file_to_retrieve = os.path.join(os.path.dirname(__file__), "files/pubmedsample_2019.xml.gz")
+ ftpretr = mocker.patch("fatcat_tools.harvest.pubmed.ftpretr")
ftpretr.return_value = file_to_retrieve
- test_date = '2020-02-20'
+ test_date = "2020-02-20"
# We'll need one entry in the date_file_map.
- generate_date_file_map = mocker.patch('fatcat_tools.harvest.pubmed.generate_date_file_map')
- generate_date_file_map.return_value = {test_date: set(['dummy'])}
+ generate_date_file_map = mocker.patch("fatcat_tools.harvest.pubmed.generate_date_file_map")
+ generate_date_file_map.return_value = {test_date: set(["dummy"])}
# For cleanup.
os.remove = mocker.Mock()
@@ -41,7 +41,7 @@ def test_pubmed_harvest_date(mocker):
harvester.producer = mocker.Mock()
harvester.date_file_map = generate_date_file_map()
# Since we mock out the FTP fetch, the concrete date does not matter here.
- harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d'))
+ harvester.fetch_date(datetime.datetime.strptime(test_date, "%Y-%m-%d"))
# check that we published the expected number of DOI objects were published
# to the (mock) kafka topic
@@ -49,20 +49,23 @@ def test_pubmed_harvest_date(mocker):
assert harvester.producer.flush.call_count == 1
assert os.remove.call_count == 2
+
def test_pubmed_harvest_date_no_pmid(mocker):
# mock out the harvest state object so it doesn't try to actually connect
# to Kafka
- mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka')
+ mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka")
- file_to_retrieve = os.path.join(os.path.dirname(__file__), 'files/pubmedsample_no_pmid_2019.xml.gz')
- ftpretr = mocker.patch('fatcat_tools.harvest.pubmed.ftpretr')
+ file_to_retrieve = os.path.join(
+ os.path.dirname(__file__), "files/pubmedsample_no_pmid_2019.xml.gz"
+ )
+ ftpretr = mocker.patch("fatcat_tools.harvest.pubmed.ftpretr")
ftpretr.return_value = file_to_retrieve
- test_date = '2020-02-20'
+ test_date = "2020-02-20"
# We'll need one entry in the date_file_map.
- generate_date_file_map = mocker.patch('fatcat_tools.harvest.pubmed.generate_date_file_map')
- generate_date_file_map.return_value = {test_date: set(['dummy'])}
+ generate_date_file_map = mocker.patch("fatcat_tools.harvest.pubmed.generate_date_file_map")
+ generate_date_file_map.return_value = {test_date: set(["dummy"])}
harvester = PubmedFTPWorker(
kafka_hosts="dummy",
@@ -74,4 +77,4 @@ def test_pubmed_harvest_date_no_pmid(mocker):
# The file has not PMID, not importable.
with pytest.raises(ValueError):
- harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d'))
+ harvester.fetch_date(datetime.datetime.strptime(test_date, "%Y-%m-%d"))