aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/fatcat_tools/harvest/doi_registrars.py32
-rw-r--r--python/tests/files/datacite_api.json1
-rw-r--r--python/tests/harvest_datacite.py45
3 files changed, 73 insertions, 5 deletions
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index 13abb2e6..33f44600 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -8,6 +8,7 @@ import itertools
import datetime
import requests
from confluent_kafka import Producer, KafkaException
+from urllib.parse import urlparse, parse_qs
from fatcat_tools.workers import most_recent_message
from .harvest_common import HarvestState, requests_retry_session
@@ -121,6 +122,10 @@ class HarvestCrossrefWorker:
self.producer.poll(0)
time.sleep(30.0)
continue
+ if http_resp.status_code == 400:
+ print("skipping batch for {}, due to HTTP 400. Marking complete. Related: https://github.com/datacite/datacite/issues/897".format(date_str),
+ file=sys.stderr)
+ break
http_resp.raise_for_status()
resp = http_resp.json()
items = self.extract_items(resp)
@@ -179,7 +184,7 @@ class HarvestDataciteWorker(HarvestCrossrefWorker):
"""
def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email,
- api_host_url="https://api.datacite.org/works",
+ api_host_url="https://api.datacite.org/dois",
start_date=None, end_date=None):
super().__init__(kafka_hosts=kafka_hosts,
produce_topic=produce_topic,
@@ -193,11 +198,13 @@ class HarvestDataciteWorker(HarvestCrossrefWorker):
self.name = "Datacite"
def params(self, date_str):
+ """
+ Dates have to be supplied in 2018-10-27T22:36:30.000Z format.
+ """
return {
- 'from-update-date': date_str,
- 'until-update-date': date_str,
+ 'query': 'updated:[{}T00:00:00.000Z TO {}T23:59:59.999Z]'.format(date_str, date_str),
'page[size]': self.api_batch_size,
- 'page[number]': 1,
+ 'page[cursor]': 1,
}
def extract_items(self, resp):
@@ -210,5 +217,20 @@ class HarvestDataciteWorker(HarvestCrossrefWorker):
return obj['attributes']['doi'].encode('utf-8')
def update_params(self, params, resp):
- params['page[number]'] = resp['meta']['page'] + 1
+ """
+ Using cursor mechanism (https://support.datacite.org/docs/pagination#section-cursor).
+
+ $ curl -sL https://is.gd/cLbE5h | jq -r .links.next
+
+ Example: https://is.gd/cLbE5h
+
+ Further API errors reported:
+ https://github.com/datacite/datacite/issues/897 (HTTP 400)
+ https://github.com/datacite/datacite/issues/898 (HTTP 500)
+ """
+ parsed = urlparse(resp['links']['next'])
+ page_cursor = parse_qs(parsed.query).get('page[cursor]')
+ if not page_cursor:
+ raise ValueError('no page[cursor] in .links.next')
+ params['page[cursor]'] = page_cursor[0]
return params
diff --git a/python/tests/files/datacite_api.json b/python/tests/files/datacite_api.json
new file mode 100644
index 00000000..7852d730
--- /dev/null
+++ b/python/tests/files/datacite_api.json
@@ -0,0 +1 @@
+{"data":[{"id":"10.3204/desy-2014-00864","type":"dois","attributes":{"doi":"10.3204/desy-2014-00864","identifiers":[{"identifier":"https://doi.org/10.3204/desy-2014-00864","identifierType":"DOI"}],"creators":[{"name":"Abdo, A. A.","nameType":"Personal","givenName":"A. A.","familyName":"Abdo","affiliation":[]},{"name":"Ajello, M.","nameType":"Personal","givenName":"M.","familyName":"Ajello","affiliation":[]},{"name":"Allafort, A.","nameType":"Personal","givenName":"A.","familyName":"Allafort","affiliation":[]},{"name":"Baldini, L.","nameType":"Personal","givenName":"L.","familyName":"Baldini","affiliation":[]},{"name":"Ballet, J.","nameType":"Personal","givenName":"J.","familyName":"Ballet","affiliation":[]},{"name":"Barbiellini, G.","nameType":"Personal","givenName":"G.","familyName":"Barbiellini","affiliation":[]},{"name":"Baring, M. G.","nameType":"Personal","givenName":"M. G.","familyName":"Baring","affiliation":[]},{"name":"Bastieri, D.","nameType":"Personal","givenName":"D.","familyName":"Bastieri","affiliation":[]},{"name":"Belfiore, A.","nameType":"Personal","givenName":"A.","familyName":"Belfiore","affiliation":[]},{"name":"Bellazzini, R.","nameType":"Personal","givenName":"R.","familyName":"Bellazzini","affiliation":[]},{"name":"Bhattacharyya, B.","nameType":"Personal","givenName":"B.","familyName":"Bhattacharyya","affiliation":[]},{"name":"Bissaldi, E.","nameType":"Personal","givenName":"E.","familyName":"Bissaldi","affiliation":[]},{"name":"Bloom, E. D.","nameType":"Personal","givenName":"E. D.","familyName":"Bloom","affiliation":[]},{"name":"Bonamente, E.","nameType":"Personal","givenName":"E.","familyName":"Bonamente","affiliation":[]},{"name":"Bottacini, E.","nameType":"Personal","givenName":"E.","familyName":"Bottacini","affiliation":[]},{"name":"Brandt, T. J.","nameType":"Personal","givenName":"T. J.","familyName":"Brandt","affiliation":[]},{"name":"Bregeon, J.","nameType":"Personal","givenName":"J.","familyName":"Bregeon","affiliation":[]},{"name":"Brigida, M.","nameType":"Personal","givenName":"M.","familyName":"Brigida","affiliation":[]},{"name":"Bruel, P.","nameType":"Personal","givenName":"P.","familyName":"Bruel","affiliation":[]},{"name":"Buehler, R.","nameType":"Personal","givenName":"R.","familyName":"Buehler","affiliation":["Deutsches Elektronen-Synchrotron"]},{"name":"Burgay, M.","nameType":"Personal","givenName":"M.","familyName":"Burgay","affiliation":[]},{"name":"Burnett, T. H.","nameType":"Personal","givenName":"T. H.","familyName":"Burnett","affiliation":[]},{"name":"Busetto, G.","nameType":"Personal","givenName":"G.","familyName":"Busetto","affiliation":[]},{"name":"Buson, S.","nameType":"Personal","givenName":"S.","familyName":"Buson","affiliation":[]},{"name":"Caliandro, G. A.","nameType":"Personal","givenName":"G. A.","familyName":"Caliandro","affiliation":[]},{"name":"Cameron, R. A.","nameType":"Personal","givenName":"R. A.","familyName":"Cameron","affiliation":[]},{"name":"Camilo, F.","nameType":"Personal","givenName":"F.","familyName":"Camilo","affiliation":[]},{"name":"Caraveo, P. A.","nameType":"Personal","givenName":"P. A.","familyName":"Caraveo","affiliation":[]},{"name":"Casandjian, J. M.","nameType":"Personal","givenName":"J. M.","familyName":"Casandjian","affiliation":[]},{"name":"Cecchi, C.","nameType":"Personal","givenName":"C.","familyName":"Cecchi","affiliation":[]},{"name":"Çelik, Ö.","nameType":"Personal","givenName":"Ö.","familyName":"Çelik","affiliation":[]},{"name":"Charles, E.","nameType":"Personal","givenName":"E.","familyName":"Charles","affiliation":[]},{"name":"Chaty, S.","nameType":"Personal","givenName":"S.","familyName":"Chaty","affiliation":[]},{"name":"Chaves, R. C. G.","nameType":"Personal","givenName":"R. C. G.","familyName":"Chaves","affiliation":[]},{"name":"Chekhtman, A.","nameType":"Personal","givenName":"A.","familyName":"Chekhtman","affiliation":[]},{"name":"Chen, A. W.","nameType":"Personal","givenName":"A. W.","familyName":"Chen","affiliation":[]},{"name":"Chiang, J.","nameType":"Personal","givenName":"J.","familyName":"Chiang","affiliation":[]},{"name":"Chiaro, G.","nameType":"Personal","givenName":"G.","familyName":"Chiaro","affiliation":[]},{"name":"Ciprini, S.","nameType":"Personal","givenName":"S.","familyName":"Ciprini","affiliation":[]},{"name":"Claus, R.","nameType":"Personal","givenName":"R.","familyName":"Claus","affiliation":[]},{"name":"Cognard, I.","nameType":"Personal","givenName":"I.","familyName":"Cognard","affiliation":[]},{"name":"Cohen-Tanugi, J.","nameType":"Personal","givenName":"J.","familyName":"Cohen-Tanugi","affiliation":[]},{"name":"Cominsky, L. R.","nameType":"Personal","givenName":"L. R.","familyName":"Cominsky","affiliation":[]},{"name":"Conrad, J.","nameType":"Personal","givenName":"J.","familyName":"Conrad","affiliation":[]},{"name":"Cutini, S.","nameType":"Personal","givenName":"S.","familyName":"Cutini","affiliation":[]},{"name":"D'Ammando, F.","nameType":"Personal","givenName":"F.","familyName":"D'Ammando","affiliation":[]},{"name":"De Angelis, A.","nameType":"Personal","givenName":"A.","familyName":"De Angelis","affiliation":[]},{"name":"DeCesar, M. E.","nameType":"Personal","givenName":"M. E.","familyName":"DeCesar","affiliation":[]},{"name":"De Luca, A.","nameType":"Personal","givenName":"A.","familyName":"De Luca","affiliation":[]},{"name":"Den Hartog, P. R.","nameType":"Personal","givenName":"P. R.","familyName":"Den Hartog","affiliation":[]},{"name":"De Palma, F.","nameType":"Personal","givenName":"F.","familyName":"De Palma","affiliation":[]},{"name":"Dermer, C. D.","nameType":"Personal","givenName":"C. D.","familyName":"Dermer","affiliation":[]},{"name":"Desvignes, G.","nameType":"Personal","givenName":"G.","familyName":"Desvignes","affiliation":[]},{"name":"Digel, S. W.","nameType":"Personal","givenName":"S. W.","familyName":"Digel","affiliation":[]},{"name":"Di Venere, L.","nameType":"Personal","givenName":"L.","familyName":"Di Venere","affiliation":[]},{"name":"Drell, P. S.","nameType":"Personal","givenName":"P. S.","familyName":"Drell","affiliation":[]},{"name":"Drlica-Wagner, A.","nameType":"Personal","givenName":"A.","familyName":"Drlica-Wagner","affiliation":[]},{"name":"Dubois, R.","nameType":"Personal","givenName":"R.","familyName":"Dubois","affiliation":[]},{"name":"Dumora, D.","nameType":"Personal","givenName":"D.","familyName":"Dumora","affiliation":[]},{"name":"Espinoza, C. M.","nameType":"Personal","givenName":"C. M.","familyName":"Espinoza","affiliation":[]},{"name":"Falletti, L.","nameType":"Personal","givenName":"L.","familyName":"Falletti","affiliation":[]},{"name":"Favuzzi, C.","nameType":"Personal","givenName":"C.","familyName":"Favuzzi","affiliation":[]},{"name":"Ferrara, E. C.","nameType":"Personal","givenName":"E. C.","familyName":"Ferrara","affiliation":[]},{"name":"Focke, W. B.","nameType":"Personal","givenName":"W. B.","familyName":"Focke","affiliation":[]},{"name":"Franckowiak, A.","nameType":"Personal","givenName":"A.","familyName":"Franckowiak","affiliation":[]},{"name":"Freire, P. C. C.","nameType":"Personal","givenName":"P. C. C.","familyName":"Freire","affiliation":[]},{"name":"Funk, S.","nameType":"Personal","givenName":"S.","familyName":"Funk","affiliation":[]},{"name":"Fusco, P.","nameType":"Personal","givenName":"P.","familyName":"Fusco","affiliation":[]},{"name":"Gargano, F.","nameType":"Personal","givenName":"F.","familyName":"Gargano","affiliation":[]},{"name":"Gasparrini, D.","nameType":"Personal","givenName":"D.","familyName":"Gasparrini","affiliation":[]},{"name":"Germani, S.","nameType":"Personal","givenName":"S.","familyName":"Germani","affiliation":[]},{"name":"Giglietto, N.","nameType":"Personal","givenName":"N.","familyName":"Giglietto","affiliation":[]},{"name":"Giommi, P.","nameType":"Personal","givenName":"P.","familyName":"Giommi","affiliation":[]},{"name":"Giordano, F.","nameType":"Personal","givenName":"F.","familyName":"Giordano","affiliation":[]},{"name":"Giroletti, M.","nameType":"Personal","givenName":"M.","familyName":"Giroletti","affiliation":[]},{"name":"Glanzman, T.","nameType":"Personal","givenName":"T.","familyName":"Glanzman","affiliation":[]},{"name":"Godfrey, G.","nameType":"Personal","givenName":"G.","familyName":"Godfrey","affiliation":[]},{"name":"Gotthelf, E. V.","nameType":"Personal","givenName":"E. V.","familyName":"Gotthelf","affiliation":[]},{"name":"Grenier, I. A.","nameType":"Personal","givenName":"I. A.","familyName":"Grenier","affiliation":[]},{"name":"Grondin, M.-H.","nameType":"Personal","givenName":"M.-H.","familyName":"Grondin","affiliation":[]},{"name":"Grove, J. E.","nameType":"Personal","givenName":"J. E.","familyName":"Grove","affiliation":[]},{"name":"Guillemot, L.","nameType":"Personal","givenName":"L.","familyName":"Guillemot","affiliation":[]},{"name":"Guiriec, S.","nameType":"Personal","givenName":"S.","familyName":"Guiriec","affiliation":[]},{"name":"Hadasch, D.","nameType":"Personal","givenName":"D.","familyName":"Hadasch","affiliation":[]},{"name":"Hanabata, Y.","nameType":"Personal","givenName":"Y.","familyName":"Hanabata","affiliation":[]},{"name":"Harding, A. K.","nameType":"Personal","givenName":"A. K.","familyName":"Harding","affiliation":[]},{"name":"Hayashida, M.","nameType":"Personal","givenName":"M.","familyName":"Hayashida","affiliation":[]},{"name":"Hays, E.","nameType":"Personal","givenName":"E.","familyName":"Hays","affiliation":[]},{"name":"Hessels, J.","nameType":"Personal","givenName":"J.","familyName":"Hessels","affiliation":[]},{"name":"Hewitt, J.","nameType":"Personal","givenName":"J.","familyName":"Hewitt","affiliation":[]},{"name":"Hill, A. B.","nameType":"Personal","givenName":"A. B.","familyName":"Hill","affiliation":[]},{"name":"Horan, D.","nameType":"Personal","givenName":"D.","familyName":"Horan","affiliation":[]},{"name":"Hou, X.","nameType":"Personal","givenName":"X.","familyName":"Hou","affiliation":[]},{"name":"Hughes, R. E.","nameType":"Personal","givenName":"R. E.","familyName":"Hughes","affiliation":[]},{"name":"Jackson, M. S.","nameType":"Personal","givenName":"M. S.","familyName":"Jackson","affiliation":[]},{"name":"Janssen, G. H.","nameType":"Personal","givenName":"G. H.","familyName":"Janssen","affiliation":[]},{"name":"Jogler, T.","nameType":"Personal","givenName":"T.","familyName":"Jogler","affiliation":[]},{"name":"Jóhannesson, G.","nameType":"Personal","givenName":"G.","familyName":"Jóhannesson","affiliation":[]},{"name":"Johnson, R. P.","nameType":"Personal","givenName":"R. P.","familyName":"Johnson","affiliation":[]},{"name":"Johnson, A. S.","nameType":"Personal","givenName":"A. S.","familyName":"Johnson","affiliation":[]},{"name":"Johnson, T. J.","nameType":"Personal","givenName":"T. J.","familyName":"Johnson","affiliation":[]},{"name":"Johnson, W. N.","nameType":"Personal","givenName":"W. N.","familyName":"Johnson","affiliation":[]},{"name":"Johnston, S.","nameType":"Personal","givenName":"S.","familyName":"Johnston","affiliation":[]},{"name":"Kamae, T.","nameType":"Personal","givenName":"T.","familyName":"Kamae","affiliation":[]},{"name":"Kataoka, J.","nameType":"Personal","givenName":"J.","familyName":"Kataoka","affiliation":[]},{"name":"Keith, M.","nameType":"Personal","givenName":"M.","familyName":"Keith","affiliation":[]},{"name":"Kerr, M.","nameType":"Personal","givenName":"M.","familyName":"Kerr","affiliation":[]},{"name":"Knödlseder, J.","nameType":"Personal","givenName":"J.","familyName":"Knödlseder","affiliation":[]},{"name":"Kramer, M.","nameType":"Personal","givenName":"M.","familyName":"Kramer","affiliation":[]},{"name":"Kuss, M.","nameType":"Personal","givenName":"M.","familyName":"Kuss","affiliation":[]},{"name":"Lande, J.","nameType":"Personal","givenName":"J.","familyName":"Lande","affiliation":[]},{"name":"Larsson, S.","nameType":"Personal","givenName":"S.","familyName":"Larsson","affiliation":[]},{"name":"Latronico, L.","nameType":"Personal","givenName":"L.","familyName":"Latronico","affiliation":[]},{"name":"Lemoine-Goumard, M.","nameType":"Personal","givenName":"M.","familyName":"Lemoine-Goumard","affiliation":[]},{"name":"Longo, F.","nameType":"Personal","givenName":"F.","familyName":"Longo","affiliation":[]},{"name":"Loparco, F.","nameType":"Personal","givenName":"F.","familyName":"Loparco","affiliation":[]},{"name":"Lovellette, M. N.","nameType":"Personal","givenName":"M. N.","familyName":"Lovellette","affiliation":[]},{"name":"Lubrano, P.","nameType":"Personal","givenName":"P.","familyName":"Lubrano","affiliation":[]},{"name":"Lyne, A. G.","nameType":"Personal","givenName":"A. G.","familyName":"Lyne","affiliation":[]},{"name":"Manchester, R. N.","nameType":"Personal","givenName":"R. N.","familyName":"Manchester","affiliation":[]},{"name":"Marelli, M.","nameType":"Personal","givenName":"M.","familyName":"Marelli","affiliation":[]},{"name":"Massaro, F.","nameType":"Personal","givenName":"F.","familyName":"Massaro","affiliation":[]},{"name":"Mayer, Michael","nameType":"Personal","givenName":"Michael","familyName":"Mayer","affiliation":["Deutsches Elektronen-Synchrotron"]},{"name":"Mazziotta, M. N.","nameType":"Personal","givenName":"M. N.","familyName":"Mazziotta","affiliation":[]},{"name":"McEnery, J. E.","nameType":"Personal","givenName":"J. E.","familyName":"McEnery","affiliation":[]},{"name":"McLaughlin, M. A.","nameType":"Personal","givenName":"M. A.","familyName":"McLaughlin","affiliation":[]},{"name":"Mehault, J.","nameType":"Personal","givenName":"J.","familyName":"Mehault","affiliation":[]},{"name":"Michelson, P. F.","nameType":"Personal","givenName":"P. F.","familyName":"Michelson","affiliation":[]},{"name":"Mignani, R. P.","nameType":"Personal","givenName":"R. P.","familyName":"Mignani","affiliation":[]},{"name":"Mitthumsiri, W.","nameType":"Personal","givenName":"W.","familyName":"Mitthumsiri","affiliation":[]},{"name":"Mizuno, T.","nameType":"Personal","givenName":"T.","familyName":"Mizuno","affiliation":[]},{"name":"Moiseev, A. A.","nameType":"Personal","givenName":"A. A.","familyName":"Moiseev","affiliation":[]},{"name":"Monzani, M. E.","nameType":"Personal","givenName":"M. E.","familyName":"Monzani","affiliation":[]},{"name":"Morselli, A.","nameType":"Personal","givenName":"A.","familyName":"Morselli","affiliation":[]},{"name":"Moskalenko, I. V.","nameType":"Personal","givenName":"I. V.","familyName":"Moskalenko","affiliation":[]},{"name":"Murgia, S.","nameType":"Personal","givenName":"S.","familyName":"Murgia","affiliation":[]},{"name":"Nakamori, T.","nameType":"Personal","givenName":"T.","familyName":"Nakamori","affiliation":[]},{"name":"Nemmen, R.","nameType":"Personal","givenName":"R.","familyName":"Nemmen","affiliation":[]},{"name":"Nuss, E.","nameType":"Personal","givenName":"E.","familyName":"Nuss","affiliation":[]},{"name":"Ohno, M.","nameType":"Personal","givenName":"M.","familyName":"Ohno","affiliation":[]},{"name":"Ohsugi, T.","nameType":"Personal","givenName":"T.","familyName":"Ohsugi","affiliation":[]},{"name":"Orienti, M.","nameType":"Personal","givenName":"M.","familyName":"Orienti","affiliation":[]},{"name":"Orlando, E.","nameType":"Personal","givenName":"E.","familyName":"Orlando","affiliation":[]},{"name":"Ormes, J. F.","nameType":"Personal","givenName":"J. F.","familyName":"Ormes","affiliation":[]},{"name":"Paneque, D.","nameType":"Personal","givenName":"D.","familyName":"Paneque","affiliation":[]},{"name":"Panetta, J. H.","nameType":"Personal","givenName":"J. H.","familyName":"Panetta","affiliation":[]},{"name":"Parent, D.","nameType":"Personal","givenName":"D.","familyName":"Parent","affiliation":[]},{"name":"Perkins, J. S.","nameType":"Personal","givenName":"J. S.","familyName":"Perkins","affiliation":[]},{"name":"Pesce-Rollins, M.","nameType":"Personal","givenName":"M.","familyName":"Pesce-Rollins","affiliation":[]},{"name":"Pierbattista, M.","nameType":"Personal","givenName":"M.","familyName":"Pierbattista","affiliation":[]},{"name":"Piron, F.","nameType":"Personal","givenName":"F.","familyName":"Piron","affiliation":[]},{"name":"Pivato, G.","nameType":"Personal","givenName":"G.","familyName":"Pivato","affiliation":[]},{"name":"Pletsch, H. J.","nameType":"Personal","givenName":"H. J.","familyName":"Pletsch","affiliation":[]},{"name":"Porter, T. A.","nameType":"Personal","givenName":"T. A.","familyName":"Porter","affiliation":[]},{"name":"Possenti, A.","nameType":"Personal","givenName":"A.","familyName":"Possenti","affiliation":[]},{"name":"Rainò, S.","nameType":"Personal","givenName":"S.","familyName":"Rainò","affiliation":[]},{"name":"Rando, R.","nameType":"Personal","givenName":"R.","familyName":"Rando","affiliation":[]},{"name":"Ransom, S. M.","nameType":"Personal","givenName":"S. M.","familyName":"Ransom","affiliation":[]},{"name":"Ray, P. S.","nameType":"Personal","givenName":"P. S.","familyName":"Ray","affiliation":[]},{"name":"Razzano, M.","nameType":"Personal","givenName":"M.","familyName":"Razzano","affiliation":[]},{"name":"Rea, N.","nameType":"Personal","givenName":"N.","familyName":"Rea","affiliation":[]},{"name":"Reimer, A.","nameType":"Personal","givenName":"A.","familyName":"Reimer","affiliation":[]},{"name":"Reimer, O.","nameType":"Personal","givenName":"O.","familyName":"Reimer","affiliation":[]},{"name":"Renault, N.","nameType":"Personal","givenName":"N.","familyName":"Renault","affiliation":[]},{"name":"Reposeur, T.","nameType":"Personal","givenName":"T.","familyName":"Reposeur","affiliation":[]},{"name":"Ritz, S.","nameType":"Personal","givenName":"S.","familyName":"Ritz","affiliation":[]},{"name":"Romani, R. W.","nameType":"Personal","givenName":"R. W.","familyName":"Romani","affiliation":[]},{"name":"Roth, M.","nameType":"Personal","givenName":"M.","familyName":"Roth","affiliation":[]},{"name":"Rousseau, R.","nameType":"Personal","givenName":"R.","familyName":"Rousseau","affiliation":[]},{"name":"Roy, J.","nameType":"Personal","givenName":"J.","familyName":"Roy","affiliation":[]},{"name":"Ruan, J.","nameType":"Personal","givenName":"J.","familyName":"Ruan","affiliation":[]},{"name":"Sartori, A.","nameType":"Personal","givenName":"A.","familyName":"Sartori","affiliation":[]},{"name":"Saz Parkinson, P. M.","nameType":"Personal","givenName":"P. M.","familyName":"Saz Parkinson","affiliation":[]},{"name":"Scargle, J. D.","nameType":"Personal","givenName":"J. D.","familyName":"Scargle","affiliation":[]},{"name":"Schulz, A.","nameType":"Personal","givenName":"A.","familyName":"Schulz","affiliation":["Deutsches Elektronen-Synchrotron"]},{"name":"Sgrò, C.","nameType":"Personal","givenName":"C.","familyName":"Sgrò","affiliation":[]},{"name":"Shannon, R.","nameType":"Personal","givenName":"R.","familyName":"Shannon","affiliation":[]},{"name":"Siskind, E. J.","nameType":"Personal","givenName":"E. J.","familyName":"Siskind","affiliation":[]},{"name":"Smith, D. A.","nameType":"Personal","givenName":"D. A.","familyName":"Smith","affiliation":[]},{"name":"Spandre, G.","nameType":"Personal","givenName":"G.","familyName":"Spandre","affiliation":[]},{"name":"Spinelli, P.","nameType":"Personal","givenName":"P.","familyName":"Spinelli","affiliation":[]},{"name":"Stappers, B. W.","nameType":"Personal","givenName":"B. W.","familyName":"Stappers","affiliation":[]},{"name":"Strong, A. W.","nameType":"Personal","givenName":"A. W.","familyName":"Strong","affiliation":[]},{"name":"Suson, D. J.","nameType":"Personal","givenName":"D. J.","familyName":"Suson","affiliation":[]},{"name":"Takahashi, H.","nameType":"Personal","givenName":"H.","familyName":"Takahashi","affiliation":[]},{"name":"Thayer, J. G.","nameType":"Personal","givenName":"J. G.","familyName":"Thayer","affiliation":[]},{"name":"Thayer, J. B.","nameType":"Personal","givenName":"J. B.","familyName":"Thayer","affiliation":[]},{"name":"Theureau, G.","nameType":"Personal","givenName":"G.","familyName":"Theureau","affiliation":[]},{"name":"Thompson, D. J.","nameType":"Personal","givenName":"D. J.","familyName":"Thompson","affiliation":[]},{"name":"Thorsett, S. E.","nameType":"Personal","givenName":"S. E.","familyName":"Thorsett","affiliation":[]},{"name":"Tibaldo, L.","nameType":"Personal","givenName":"L.","familyName":"Tibaldo","affiliation":[]},{"name":"Tibolla, O.","nameType":"Personal","givenName":"O.","familyName":"Tibolla","affiliation":[]},{"name":"Tinivella, M.","nameType":"Personal","givenName":"M.","familyName":"Tinivella","affiliation":[]},{"name":"Torres, D. F.","nameType":"Personal","givenName":"D. F.","familyName":"Torres","affiliation":[]},{"name":"Tosti, G.","nameType":"Personal","givenName":"G.","familyName":"Tosti","affiliation":[]},{"name":"Troja, E.","nameType":"Personal","givenName":"E.","familyName":"Troja","affiliation":[]},{"name":"Uchiyama, Y.","nameType":"Personal","givenName":"Y.","familyName":"Uchiyama","affiliation":[]},{"name":"Usher, T. L.","nameType":"Personal","givenName":"T. L.","familyName":"Usher","affiliation":[]},{"name":"Vandenbroucke, J.","nameType":"Personal","givenName":"J.","familyName":"Vandenbroucke","affiliation":[]},{"name":"Vasileiou, V.","nameType":"Personal","givenName":"V.","familyName":"Vasileiou","affiliation":[]},{"name":"Venter, C.","nameType":"Personal","givenName":"C.","familyName":"Venter","affiliation":[]},{"name":"Vianello, G.","nameType":"Personal","givenName":"G.","familyName":"Vianello","affiliation":[]},{"name":"Vitale, V.","nameType":"Personal","givenName":"V.","familyName":"Vitale","affiliation":[]},{"name":"Wang, N.","nameType":"Personal","givenName":"N.","familyName":"Wang","affiliation":[]},{"name":"Weltevrede, P.","nameType":"Personal","givenName":"P.","familyName":"Weltevrede","affiliation":[]},{"name":"Winer, B. L.","nameType":"Personal","givenName":"B. L.","familyName":"Winer","affiliation":[]},{"name":"Wolff, M. T.","nameType":"Personal","givenName":"M. T.","familyName":"Wolff","affiliation":[]},{"name":"Wood, D. L.","nameType":"Personal","givenName":"D. L.","familyName":"Wood","affiliation":[]},{"name":"Wood, K. S.","nameType":"Personal","givenName":"K. S.","familyName":"Wood","affiliation":[]},{"name":"Wood, M.","nameType":"Personal","givenName":"M.","familyName":"Wood","affiliation":[]},{"name":"Yang, Z.","nameType":"Personal","givenName":"Z.","familyName":"Yang","affiliation":[]}],"titles":[{"title":"THE SECOND FERMI LARGE AREA TELESCOPE CATALOG OF GAMMA-RAY PULSARS"}],"publisher":"Deutsches Elektronen-Synchrotron, DESY, Hamburg","container":{"type":"Series","title":"The astrophysical journal / Supplement series 208(2)","lastPage":"59 (2013). doi:10.1088/0067","firstPage":"1","identifier":"0067-0049","identifierType":"ISSN"},"publicationYear":2013,"subjects":[{"subject":"520","schemeUri":"http://dewey.info/","subjectScheme":"dewey"}],"contributors":[],"dates":[{"date":"2013","dateType":"Copyrighted"},{"date":"2013","dateType":"Issued"}],"language":"en","types":{"ris":"JOUR","bibtex":"article","citeproc":"article-journal","schemaOrg":"ScholarlyArticle","resourceType":"Journal article","resourceTypeGeneral":"Text"},"relatedIdentifiers":[{"relationType":"IsVariantFormOf","relatedIdentifier":"10.1088/0067-0049/208/2/17","relatedIdentifierType":"DOI"},{"relationType":"IsPartOf","relatedIdentifier":"0067-0049","relatedIdentifierType":"ISSN"},{"relationType":"IsPartOf","relatedIdentifier":"1538-4365","relatedIdentifierType":"ISSN"}],"sizes":["pages 1-59"],"formats":[],"version":null,"rightsList":[],"descriptions":[{"description":"The astrophysical journal / Supplement series 208(2), 1-59 (2013). doi:10.1088/0067-0049/208/2/17","descriptionType":"SeriesInformation"},{"description":"Published by Institute of Physics Publ., London","descriptionType":"Other"}],"geoLocations":[],"fundingReferences":[],"url":"http://bib-pubdb1.desy.de/record/165916","contentUrl":null,"metadataVersion":0,"schemaVersion":"http://datacite.org/schema/kernel-3","source":null,"isActive":true,"state":"findable","reason":null,"created":"2018-06-28T09:19:08.000Z","registered":"2018-06-28T09:19:09.000Z","published":"2013","updated":"2019-02-03T02:31:46.000Z"},"relationships":{"client":{"data":{"id":"tib.desy","type":"clients"}}}}],"included":[{"id":"tib.desy","type":"clients","attributes":{"name":"DESY - Deutsches Elektronen-Synchrotron","symbol":"TIB.DESY","year":2011,"contactEmail":"Kirsten.Sachs@desy.de","alternateName":null,"description":null,"language":[],"clientType":"repository","domains":"desy.de,cern.ch","re3data":null,"opendoar":null,"issn":null,"url":null,"created":"2011-08-12T09:39:17.000Z","updated":"2018-08-26T02:35:08.000Z","isActive":true,"hasPassword":true},"relationships":{"provider":{"data":{"id":"tib","type":"providers"}},"prefixes":{"data":[{"id":"10.3204","type":"prefixes"}]}}}],"meta":{"total":1,"totalPages":1,"states":[{"id":"findable","title":"Findable","count":1}],"resourceTypes":[{"id":"text","title":"Text","count":1}],"created":[{"id":"2018","title":"2018","count":1}],"registered":[{"id":"2018","title":"2018","count":1}],"providers":[{"id":"tib","title":"German National Library of Science and Technology","count":1}],"clients":[{"id":"tib.desy","title":"DESY - Deutsches Elektronen-Synchrotron","count":1}],"affiliations":[],"prefixes":[{"id":"10.3204","title":"10.3204","count":1}],"certificates":[],"schemaVersions":[{"id":"3","title":"Schema 3","count":1}],"sources":[],"linkChecksStatus":[],"linksChecked":0,"linksWithSchemaOrg":[{"id":"0","title":"0","count":1}],"linkChecksSchemaOrgId":0,"linkChecksDcIdentifier":0,"linkChecksCitationDoi":0,"subjects":[{"id":"520","title":"520","count":1}]},"links":{"self":"https://api.datacite.org/dois?page%5Bcursor%5D=1&page%5Bsize%5D=50&query=updated%3A%5B2019-02-03T00%3A00%3A00.000Z+TO+2019-02-03T23%3A59%3A59.999Z%5D"}} \ No newline at end of file
diff --git a/python/tests/harvest_datacite.py b/python/tests/harvest_datacite.py
new file mode 100644
index 00000000..926d67ba
--- /dev/null
+++ b/python/tests/harvest_datacite.py
@@ -0,0 +1,45 @@
+
+import json
+import pytest
+import datetime
+import responses
+from fatcat_tools.harvest import *
+
+
+@responses.activate
+def test_datacite_harvest_date(mocker):
+
+ # mock out the harvest state object so it doesn't try to actually connect
+ # to Kafka
+ mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka')
+
+ # mock day request to crossref API
+ with open('tests/files/datacite_api.json', 'r') as f:
+ crossref_resp = json.loads(f.readline())
+ responses.add(responses.GET, 'https://api.datacite.org/dois',
+ json=crossref_resp, status=200)
+
+ harvester = HarvestDataciteWorker(
+ kafka_hosts="dummy",
+ produce_topic="dummy-produce-topic",
+ state_topic="dummy-state-topic",
+ contact_email="test@fatcat.wiki",
+ )
+
+ harvester.producer = mocker.Mock()
+
+ harvester.fetch_date(datetime.date(2019, 2, 3))
+
+ assert len(responses.calls) == 1
+
+ # ensure email was included in User-Agent
+ assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers['User-Agent']
+
+ # check that correct date param was passed as expected
+ assert "query=updated%3A%5B2019-02-03T00%3A00%3A00.000Z+TO+2019-02-03T23%3A59%3A59.999Z%5D" in responses.calls[0].request.url
+
+ # check that we published the expected number of DOI objects were published
+ # to the (mock) kafka topic
+ assert harvester.producer.produce.call_count == 1
+ assert harvester.producer.flush.call_count == 1
+ assert harvester.producer.poll.called_once_with(0)