summaryrefslogtreecommitdiffstats
path: root/python/tests/harvest_crossref.py
diff options
context:
space:
mode:
authorMartin Czygan <martin@archive.org>2019-12-09 19:41:09 +0000
committerMartin Czygan <martin@archive.org>2019-12-09 19:41:09 +0000
commitd6bc26046bbbe7bd76f4740b12170b1b73e6d264 (patch)
tree6a9a7803bd955740b88e82d5687d29111d2ffa6e /python/tests/harvest_crossref.py
parente5a1738b67c098ad61257c4b872ecdb3f6ad74a8 (diff)
parent7cba221ba4876bd7c011c6a46dc86c4494218366 (diff)
downloadfatcat-d6bc26046bbbe7bd76f4740b12170b1b73e6d264.tar.gz
fatcat-d6bc26046bbbe7bd76f4740b12170b1b73e6d264.zip
Merge branch 'bnewbold-crossref-harvest-test' into 'master'
Basic mocked test for crossref harvester See merge request webgroup/fatcat!7
Diffstat (limited to 'python/tests/harvest_crossref.py')
-rw-r--r--python/tests/harvest_crossref.py45
1 files changed, 45 insertions, 0 deletions
diff --git a/python/tests/harvest_crossref.py b/python/tests/harvest_crossref.py
new file mode 100644
index 00000000..52aa7b81
--- /dev/null
+++ b/python/tests/harvest_crossref.py
@@ -0,0 +1,45 @@
+
+import json
+import pytest
+import datetime
+import responses
+from fatcat_tools.harvest import *
+
+
+@responses.activate
+def test_crossref_harvest_date(mocker):
+
+ # mock out the harvest state object so it doesn't try to actually connect
+ # to Kafka
+ mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka')
+
+ # mock day request to crossref API
+ with open('tests/files/crossref_api_works.json', 'r') as f:
+ crossref_resp = json.loads(f.readline())
+ responses.add(responses.GET, 'https://api.crossref.org/works',
+ json=crossref_resp, status=200)
+
+ harvester = HarvestCrossrefWorker(
+ kafka_hosts="dummy",
+ produce_topic="dummy-produce-topic",
+ state_topic="dummy-state-topic",
+ contact_email="test@fatcat.wiki",
+ )
+
+ harvester.producer = mocker.Mock()
+
+ harvester.fetch_date(datetime.date(2019, 2, 3))
+
+ assert len(responses.calls) == 1
+
+ # ensure email was included in User-Agent
+ assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers['User-Agent']
+
+ # check that correct date param was passed as expected
+ assert "filter=from-index-date%3A2019-02-03" in responses.calls[0].request.url
+
+ # check that we published the expected number of DOI objects were published
+ # to the (mock) kafka topic
+ assert harvester.producer.produce.call_count == 3
+ assert harvester.producer.flush.call_count == 1
+ assert harvester.producer.poll.called_once_with(0)