diff options
Diffstat (limited to 'python/tests/harvest_pubmed.py')
-rw-r--r-- | python/tests/harvest_pubmed.py | 31 |
1 files changed, 17 insertions, 14 deletions
diff --git a/python/tests/harvest_pubmed.py b/python/tests/harvest_pubmed.py index aba766ba..422870f2 100644 --- a/python/tests/harvest_pubmed.py +++ b/python/tests/harvest_pubmed.py @@ -14,20 +14,20 @@ def test_pubmed_harvest_date(mocker): # mock out the harvest state object so it doesn't try to actually connect # to Kafka - mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka') + mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka") # Mocking a file fetched from FTP, should contain some 'PubmedArticle' elements. # $ zcat tests/files/pubmedsample_2019.xml.gz | grep -c '<PubmedArticle>' # 176 - file_to_retrieve = os.path.join(os.path.dirname(__file__), 'files/pubmedsample_2019.xml.gz') - ftpretr = mocker.patch('fatcat_tools.harvest.pubmed.ftpretr') + file_to_retrieve = os.path.join(os.path.dirname(__file__), "files/pubmedsample_2019.xml.gz") + ftpretr = mocker.patch("fatcat_tools.harvest.pubmed.ftpretr") ftpretr.return_value = file_to_retrieve - test_date = '2020-02-20' + test_date = "2020-02-20" # We'll need one entry in the date_file_map. - generate_date_file_map = mocker.patch('fatcat_tools.harvest.pubmed.generate_date_file_map') - generate_date_file_map.return_value = {test_date: set(['dummy'])} + generate_date_file_map = mocker.patch("fatcat_tools.harvest.pubmed.generate_date_file_map") + generate_date_file_map.return_value = {test_date: set(["dummy"])} # For cleanup. os.remove = mocker.Mock() @@ -41,7 +41,7 @@ def test_pubmed_harvest_date(mocker): harvester.producer = mocker.Mock() harvester.date_file_map = generate_date_file_map() # Since we mock out the FTP fetch, the concrete date does not matter here. - harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d')) + harvester.fetch_date(datetime.datetime.strptime(test_date, "%Y-%m-%d")) # check that we published the expected number of DOI objects were published # to the (mock) kafka topic @@ -49,20 +49,23 @@ def test_pubmed_harvest_date(mocker): assert harvester.producer.flush.call_count == 1 assert os.remove.call_count == 2 + def test_pubmed_harvest_date_no_pmid(mocker): # mock out the harvest state object so it doesn't try to actually connect # to Kafka - mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka') + mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka") - file_to_retrieve = os.path.join(os.path.dirname(__file__), 'files/pubmedsample_no_pmid_2019.xml.gz') - ftpretr = mocker.patch('fatcat_tools.harvest.pubmed.ftpretr') + file_to_retrieve = os.path.join( + os.path.dirname(__file__), "files/pubmedsample_no_pmid_2019.xml.gz" + ) + ftpretr = mocker.patch("fatcat_tools.harvest.pubmed.ftpretr") ftpretr.return_value = file_to_retrieve - test_date = '2020-02-20' + test_date = "2020-02-20" # We'll need one entry in the date_file_map. - generate_date_file_map = mocker.patch('fatcat_tools.harvest.pubmed.generate_date_file_map') - generate_date_file_map.return_value = {test_date: set(['dummy'])} + generate_date_file_map = mocker.patch("fatcat_tools.harvest.pubmed.generate_date_file_map") + generate_date_file_map.return_value = {test_date: set(["dummy"])} harvester = PubmedFTPWorker( kafka_hosts="dummy", @@ -74,4 +77,4 @@ def test_pubmed_harvest_date_no_pmid(mocker): # The file has not PMID, not importable. with pytest.raises(ValueError): - harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d')) + harvester.fetch_date(datetime.datetime.strptime(test_date, "%Y-%m-%d")) |