summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin@archive.org>2019-12-09 19:41:09 +0000
committerMartin Czygan <martin@archive.org>2019-12-09 19:41:09 +0000
commitd6bc26046bbbe7bd76f4740b12170b1b73e6d264 (patch)
tree6a9a7803bd955740b88e82d5687d29111d2ffa6e
parente5a1738b67c098ad61257c4b872ecdb3f6ad74a8 (diff)
parent7cba221ba4876bd7c011c6a46dc86c4494218366 (diff)
downloadfatcat-d6bc26046bbbe7bd76f4740b12170b1b73e6d264.tar.gz
fatcat-d6bc26046bbbe7bd76f4740b12170b1b73e6d264.zip
Merge branch 'bnewbold-crossref-harvest-test' into 'master'
Basic mocked test for crossref harvester See merge request webgroup/fatcat!7
-rw-r--r--python/Pipfile1
-rw-r--r--python/Pipfile.lock10
-rw-r--r--python/fatcat_tools/harvest/doi_registrars.py47
-rw-r--r--python/tests/files/crossref_api_works.json1
-rw-r--r--python/tests/harvest_crossref.py45
5 files changed, 82 insertions, 22 deletions
diff --git a/python/Pipfile b/python/Pipfile
index 0f5ee2c0..1c15cab2 100644
--- a/python/Pipfile
+++ b/python/Pipfile
@@ -16,6 +16,7 @@ responses = ">=0.10"
pytest-cov = "*"
pylint = "*"
pg-view = "*"
+pytest-mock = "*"
[packages]
fatcat-openapi-client = {path = "./../python_openapi_client"}
diff --git a/python/Pipfile.lock b/python/Pipfile.lock
index f75b7fbe..5a8ef462 100644
--- a/python/Pipfile.lock
+++ b/python/Pipfile.lock
@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
- "sha256": "b3d17cd8c87abdbb852248ec8eef83b5992df46234e7a89e7bd29a6c4d693909"
+ "sha256": "e2e05ace1d00d2859f8942ac21bbb7fcbde9dc2f28df74f4173b3dfd7c7d3932"
},
"pipfile-spec": 6,
"requires": {
@@ -800,6 +800,14 @@
"index": "pypi",
"version": "==2.7.1"
},
+ "pytest-mock": {
+ "hashes": [
+ "sha256:67e414b3caef7bff6fc6bd83b22b5bc39147e4493f483c2679bc9d4dc485a94d",
+ "sha256:e24a911ec96773022ebcc7030059b57cd3480b56d4f5d19b7c370ec635e6aed5"
+ ],
+ "index": "pypi",
+ "version": "==1.13.0"
+ },
"pytest-pylint": {
"hashes": [
"sha256:0dcb6cd429281f7493994e65324937bb295ef28f7632c046b45828380f4fe88a",
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index 2df13283..13abb2e6 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -63,6 +63,27 @@ class HarvestCrossrefWorker:
self.loop_sleep = 60*60 # how long to wait, in seconds, between date checks
self.api_batch_size = 50
self.name = "Crossref"
+ self.producer = self._kafka_producer()
+
+ def _kafka_producer(self):
+
+ def fail_fast(err, msg):
+ if err is not None:
+ print("Kafka producer delivery error: {}".format(err))
+ print("Bailing out...")
+ # TODO: should it be sys.exit(-1)?
+ raise KafkaException(err)
+
+ self._kafka_fail_fast = fail_fast
+
+ producer_conf = self.kafka_config.copy()
+ producer_conf.update({
+ 'delivery.report.only.error': True,
+ 'default.topic.config': {
+ 'request.required.acks': -1, # all brokers must confirm
+ },
+ })
+ return Producer(producer_conf)
def params(self, date_str):
filter_param = 'from-index-date:{},until-index-date:{}'.format(
@@ -82,22 +103,6 @@ class HarvestCrossrefWorker:
def fetch_date(self, date):
- def fail_fast(err, msg):
- if err is not None:
- print("Kafka producer delivery error: {}".format(err))
- print("Bailing out...")
- # TODO: should it be sys.exit(-1)?
- raise KafkaException(err)
-
- producer_conf = self.kafka_config.copy()
- producer_conf.update({
- 'delivery.report.only.error': True,
- 'default.topic.config': {
- 'request.required.acks': -1, # all brokers must confirm
- },
- })
- producer = Producer(producer_conf)
-
date_str = date.isoformat()
params = self.params(date_str)
http_session = requests_retry_session()
@@ -113,7 +118,7 @@ class HarvestCrossrefWorker:
# backoff, but allows for longer backoff/downtime on remote end
print("got HTTP {}, pausing for 30 seconds".format(http_resp.status_code))
# keep kafka producer connection alive
- producer.poll(0)
+ self.producer.poll(0)
time.sleep(30.0)
continue
http_resp.raise_for_status()
@@ -124,16 +129,16 @@ class HarvestCrossrefWorker:
self.extract_total(resp), http_resp.elapsed))
#print(json.dumps(resp))
for work in items:
- producer.produce(
+ self.producer.produce(
self.produce_topic,
json.dumps(work).encode('utf-8'),
key=self.extract_key(work),
- on_delivery=fail_fast)
- producer.poll(0)
+ on_delivery=self._kafka_fail_fast)
+ self.producer.poll(0)
if len(items) < self.api_batch_size:
break
params = self.update_params(params, resp)
- producer.flush()
+ self.producer.flush()
def extract_items(self, resp):
return resp['message']['items']
diff --git a/python/tests/files/crossref_api_works.json b/python/tests/files/crossref_api_works.json
new file mode 100644
index 00000000..c1ffccb3
--- /dev/null
+++ b/python/tests/files/crossref_api_works.json
@@ -0,0 +1 @@
+{"status":"ok","message-type":"work-list","message-version":"1.0.0","message":{"facets":{},"total-results":2473850,"items":[{"indexed":{"date-parts":[[2019,12,4]],"date-time":"2019-12-04T07:47:00Z","timestamp":1575445620667},"reference-count":0,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IT Prof."],"published-print":{"date-parts":[[2002,9]]},"DOI":"10.1109\/mitp.2002.1041177","type":"journal-article","created":{"date-parts":[[2002,11,21]],"date-time":"2002-11-21T20:12:04Z","timestamp":1037909524000},"page":"37-41","source":"Crossref","is-referenced-by-count":12,"title":["Inside SSL: accelerating secure transactions"],"prefix":"10.1109","volume":"4","author":[{"given":"W.","family":"Chou","sequence":"first","affiliation":[]}],"member":"263","container-title":["IT Professional"],"language":"en","link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6294\/22323\/01041177.pdf?arnumber=1041177","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,10]],"date-time":"2017-03-10T21:08:49Z","timestamp":1489180129000},"score":1.0,"issued":{"date-parts":[[2002,9]]},"references-count":0,"journal-issue":{"published-print":{"date-parts":[[2002,9]]},"issue":"5"},"URL":"http:\/\/dx.doi.org\/10.1109\/mitp.2002.1041177","ISSN":["1520-9202"],"issn-type":[{"value":"1520-9202","type":"print"}]},{"indexed":{"date-parts":[[2019,12,4]],"date-time":"2019-12-04T07:47:00Z","timestamp":1575445620667},"reference-count":21,"publisher":"IEEE","isbn-type":[{"value":"078039464X","type":"print"}],"content-domain":{"domain":[],"crossmark-restriction":false},"DOI":"10.1109\/infvis.2005.1532150","type":"proceedings-article","created":{"date-parts":[[2005,11,15]],"date-time":"2005-11-15T15:48:27Z","timestamp":1132069707000},"source":"Crossref","is-referenced-by-count":17,"title":["Flow map layout"],"prefix":"10.1109","author":[{"family":"Doantam Phan","sequence":"first","affiliation":[]},{"family":"Ling Xiao","sequence":"additional","affiliation":[]},{"given":"R.","family":"Yeh","sequence":"additional","affiliation":[]},{"given":"P.","family":"Hanrahan","sequence":"additional","affiliation":[]},{"given":"T.","family":"Winograd","sequence":"additional","affiliation":[]}],"member":"263","event":{"name":"IEEE Symposium on Information Visualization, 2005. INFOVIS 2005.","location":"Minneapolis, MN, USA"},"container-title":["IEEE Symposium on Information Visualization, 2005. INFOVIS 2005."],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/10260\/32681\/01532150.pdf?arnumber=1532150","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,14]],"date-time":"2017-03-14T16:09:43Z","timestamp":1489507783000},"score":1.0,"issued":{"date-parts":[[null]]},"ISBN":["078039464X"],"references-count":21,"URL":"http:\/\/dx.doi.org\/10.1109\/infvis.2005.1532150"},{"indexed":{"date-parts":[[2019,12,4]],"date-time":"2019-12-04T07:47:00Z","timestamp":1575445620667},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"URL":"http:\/\/www.springer.com\/tdm","start":{"date-parts":[[2009,9,23]],"date-time":"2009-09-23T00:00:00Z","timestamp":1253664000000},"delay-in-days":0,"content-version":"tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Math. Program."],"published-print":{"date-parts":[[2011,6]]},"DOI":"10.1007\/s10107-009-0306-5","type":"journal-article","created":{"date-parts":[[2009,9,22]],"date-time":"2009-09-22T12:28:12Z","timestamp":1253622492000},"page":"321-353","source":"Crossref","is-referenced-by-count":426,"title":["Fixed point and Bregman iterative methods for matrix rank minimization"],"prefix":"10.1007","volume":"128","author":[{"given":"Shiqian","family":"Ma","sequence":"first","affiliation":[]},{"given":"Donald","family":"Goldfarb","sequence":"additional","affiliation":[]},{"given":"Lifeng","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,9,23]]},"reference":[{"issue":"Jun","key":"306_CR1","first-page":"1019","volume":"9","author":"F.R. Bach","year":"2008","unstructured":"Bach F.R.: Consistency of trace norm minimization. J. Mach. Learn. Res. 9(Jun), 1019\u20131048 (2008)","journal-title":"J. Mach. Learn. Res."},{"key":"306_CR2","unstructured":"Bertalm\u00edo, M., Sapiro, G., Caselles, V., Ballester, C.: Image inpainting. In: Proceedings of SIGGRAPH 2000, New Orleans, USA (2000)","DOI":"10.1145\/344779.344972","doi-asserted-by":"crossref"},{"key":"306_CR3","author":"J.M. Borwein","year":"2003","unstructured":"Borwein J.M., Lewis A.S.: Convex Analysis and Nonlinear Optimization. Springer, New York (2003)","volume-title":"Convex Analysis and Nonlinear Optimization"},{"key":"306_CR4","doi-asserted-by":"crossref","first-page":"200","DOI":"10.1016\/0041-5553(67)90040-7","volume":"7","author":"L. Bregman","year":"1967","unstructured":"Bregman L.: The relaxation method of finding the common points of convex sets and its application to the solution of problems in convex programming. USSR Comput. Math. Math. Phys. 7, 200\u2013217 (1967)","journal-title":"USSR Comput. Math. Math. Phys."},{"key":"306_CR5","doi-asserted-by":"crossref","first-page":"329","DOI":"10.1007\/s10107-002-0352-8","volume":"95","author":"S. Burer","year":"2003","unstructured":"Burer S., Monteiro R.D.C.: A nonlinear programming algorithm for solving semidefinite programs via low-rank factorization. Math. Program. (Ser. B) 95, 329\u2013357 (2003)","journal-title":"Math. Program. (Ser. B)"},{"issue":"3","key":"306_CR6","doi-asserted-by":"crossref","first-page":"427","DOI":"10.1007\/s10107-004-0564-1","volume":"103","author":"S. Burer","year":"2005","unstructured":"Burer S., Monteiro R.D.C.: Local mimima and convergence in low-rank semidefinite programming. Math. Program. 103(3), 427\u2013444 (2005)","journal-title":"Math. Program."},{"key":"306_CR7","unstructured":"Cai, J., Cand\u00e8s, E.J., Shen, Z.: A singular value thresholding algorithm for matrix completion. Preprint available at http:\/\/arxiv.org\/abs\/0810.3286 (2008)"},{"key":"306_CR8","unstructured":"Cand\u00e8s, E.J., Recht, B.: Exact matrix completion via convex optimization. Found. Comput. Math. (2009)","DOI":"10.1007\/s10208-009-9045-5","doi-asserted-by":"crossref"},{"key":"306_CR9","unstructured":"Cand\u00e8s, E.J., Romberg, J.: \u2113 1-MAGIC: recovery of sparse signals via convex programming. Technical Report, Caltech (2005)"},{"key":"306_CR10","doi-asserted-by":"crossref","first-page":"489","DOI":"10.1109\/TIT.2005.862083","volume":"52","author":"E.J. Cand\u00e8s","year":"2006","unstructured":"Cand\u00e8s E.J., Romberg J., Tao T.: Robust uncertainty principles: exact signal reconstruction from highly incomplete frequency information. IEEE Trans. Inf. Theory 52, 489\u2013509 (2006)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"306_CR11","unstructured":"Cand\u00e8s, E.J., Tao, T.: The power of convex relaxation: near-optimal matrix completion. Preprint available at http:\/\/arxiv.org\/abs\/0903.1476 (2009)"},{"key":"306_CR12","unstructured":"Dai, W., Milenkovic, O.: Subspace pursuit for compressive sensing: closing the gap between performance and complexity. Preprint available at arXiv: 0803.0811 (2008)"},{"key":"306_CR13","doi-asserted-by":"crossref","first-page":"1289","DOI":"10.1109\/TIT.2006.871582","volume":"52","author":"D. Donoho","year":"2006","unstructured":"Donoho D.: Compressed sensing. IEEE Trans. Inf. Theory 52, 1289\u20131306 (2006)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"306_CR14","unstructured":"Donoho, D.L., Tsaig, Y.: Fast solution of \u2113 1-norm minimization problems when the solution may be sparse. Technical Report, Department of Statistics, Stanford University (2006)"},{"key":"306_CR15","unstructured":"Donoho, D., Tsaig, Y., Drori, I., Starck, J.C.: Sparse solution of underdetermined linear equations by stagewise orthogonal matching pursuit. IEEE Trans. Inf. Theory (2006) (submitted)"},{"key":"306_CR16","doi-asserted-by":"crossref","first-page":"158","DOI":"10.1137\/S0097539704442696","volume":"36","author":"P. Drineas","year":"2006","unstructured":"Drineas P., Kannan R., Mahoney M.W.: Fast Monte Carlo algorithms for matrices ii: computing low-rank approximations to a matrix. SIAM J. Comput. 36, 158\u2013183 (2006)","journal-title":"SIAM J. Comput."},{"key":"306_CR17","unstructured":"Fazel, M.: Matrix rank minimization with applications. Ph.D. thesis, Stanford University (2002)"},{"key":"306_CR18","unstructured":"Fazel, M., Hindi, H., Boyd, S.: A rank minimization heuristic with application to minimum order system approximation. In: Proceedings of the American Control Conference, vol. 6, pp. 4734\u20134739 (2001)"},{"key":"306_CR19","unstructured":"Figueiredo, M.A.T., Nowak, R.D., Wright, S.J.: Gradient projection for sparse reconstruction: application to compressed sensing and other inverse problems. IEEE J. Sel. Top. Signal Process. 1(4) (2007)","DOI":"10.1109\/JSTSP.2007.910281","doi-asserted-by":"crossref"},{"key":"306_CR20","unstructured":"Ghaoui, L.E., Gahinet, P.: Rank minimization under LMI constraints: a framework for output feedback problems. In: Proceedings of the European Control Conference (1993)"},{"issue":"2","key":"306_CR21","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1023\/A:1011419012209","volume":"4","author":"K. Goldberg","year":"2001","unstructured":"Goldberg K., Roeder T., Gupta D., Perkins C.: Eigentaste: a constant time collaborative filtering algorithm. Inf. Retr. 4(2), 133\u2013151 (2001)","journal-title":"Inf. Retr."},{"key":"306_CR22","unstructured":"Goldfarb, D., Ma, S.: Convergence of fixed point continuation algorithms for matrix rank minimization. Technical Report, Department of IEOR, Columbia University (2009)"},{"key":"306_CR23","unstructured":"Hale, E.T., Yin, W., Zhang, Y.: A fixed-point continuation method for \u2113 1-regularized minimization with applications to compressed sensing. Technical Report, CAAM TR07-07 (2007)"},{"key":"306_CR24","author":"J.B. Hiriart-Urruty","year":"1993","unstructured":"Hiriart-Urruty J.B., Lemar\u00e9chal C.: Convex Analysis and Minimization Algorithms II: Advanced Theory and Bundle Methods. Springer, New York (1993)","volume-title":"Convex Analysis and Minimization Algorithms II: Advanced Theory and Bundle Methods","DOI":"10.1007\/978-3-662-06409-2","doi-asserted-by":"crossref"},{"key":"306_CR25","author":"R.A. Horn","year":"1985","unstructured":"Horn R.A., Johnson C.R.: Matrix Analysis. Cambridge University Press, Cambridge (1985)","volume-title":"Matrix Analysis","DOI":"10.1017\/CBO9780511810817","doi-asserted-by":"crossref"},{"key":"306_CR26","unstructured":"Keshavan, R.H., Montanari, A., Oh, S.: Matrix completion from a few entries. Preprint available at http:\/\/arxiv.org\/abs\/0901.3150 (2009)","DOI":"10.1109\/ISIT.2009.5205567","doi-asserted-by":"crossref"},{"issue":"1","key":"306_CR27","doi-asserted-by":"crossref","first-page":"606","DOI":"10.1109\/JSTSP.2007.910971","volume":"4","author":"S.J. Kim","year":"2007","unstructured":"Kim S.J., Koh K., Lustig M., Boyd S., Gorinevsky D.: A method for large-scale \u2113 1-regularized least-squares. IEEE J. Sel. Top. Signal Process. 4(1), 606\u2013617 (2007)","journal-title":"IEEE J. Sel. Top. Signal Process."},{"key":"306_CR28","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1007\/BF01200757","volume":"15","author":"N. Linial","year":"1995","unstructured":"Linial N., London E., Rabinovich Y.: The geometry of graphs and some of its algorithmic applications. Combinatorica 15, 215\u2013245 (1995)","journal-title":"Combinatorica"},{"key":"306_CR29","unstructured":"Liu, Z., Vandenberghe, L.: Interior-point method for nuclear norm approximation with application to system identification. Preprint available at http:\/\/www.ee.ucla.edu\/~vandenbe\/publications\/nucnrm.pdf (2008)"},{"issue":"2","key":"306_CR30","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1137\/S0097539792240406","volume":"24","author":"B.K. Natarajan","year":"1995","unstructured":"Natarajan B.K.: Sparse approximation solutions to linear systems. SIAM J. Comput. 24(2), 227\u2013234 (1995)","journal-title":"SIAM J. Comput."},{"issue":"2","key":"306_CR31","doi-asserted-by":"crossref","first-page":"460","DOI":"10.1137\/040605412","volume":"4","author":"S. Osher","year":"2005","unstructured":"Osher S., Burger M., Goldfarb D., Xu J., Yin W.: An iterative regularization method for total varitaion-based image restoration. SIAM MMS 4(2), 460\u2013489 (2005)","journal-title":"SIAM MMS"},{"key":"306_CR32","unstructured":"Recht, B., Fazel, M., Parrilo, P.: Guaranteed minimum rank solutions of matrix equations via nuclear norm minimization. Preprint available at http:\/\/arxiv.org\/abs\/0706.4138 (2007)"},{"key":"306_CR33","unstructured":"Rennie, J.D.M., Srebro, N.: Fast maximum margin matrix factorization for collaborative prediction. In: Proceedings of the International Conference of Machine Learning (2005)","DOI":"10.1145\/1102351.1102441","doi-asserted-by":"crossref"},{"key":"306_CR34","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1016\/0167-2789(92)90242-F","volume":"60","author":"L. Rudin","year":"1992","unstructured":"Rudin L., Osher S., Fatemi E.: Nonlinear total variation based noise removal algorithms. Physica D 60, 259\u2013268 (1992)","journal-title":"Physica D"},{"key":"306_CR35","doi-asserted-by":"crossref","first-page":"3273","DOI":"10.1091\/mbc.9.12.3273","volume":"9","author":"P.T. Spellman","year":"1998","unstructured":"Spellman P.T., Sherlock G., Zhang M.Q., Iyer V.R., Anders K., Eisen M.B., Brown P.O., Botstein D., Futcher B.: Comprehensive identification of cell cycle-regulated genes of the yeast saccharomyces cerevisiae by microarray hybridization. Mol. Biol. Cell 9, 3273\u20133297 (1998)","journal-title":"Mol. Biol. Cell"},{"key":"306_CR36","unstructured":"Srebro, N.: Learning with matrix factorizations. Ph.D. thesis, Massachusetts Institute of Technology (2004)"},{"key":"306_CR37","unstructured":"Srebro, N., Jaakkola, T.: Weighted low-rank approximations. In: Proceedings of the Twentieth International Conference on Machine Learning (ICML-2003) (2003)"},{"issue":"12","key":"306_CR38","doi-asserted-by":"crossref","first-page":"625","DOI":"10.1080\/10556789908805766","volume":"11","author":"J.F. Sturm","year":"1999","unstructured":"Sturm J.F.: Using SeDuMi 1.02, a Matlab toolbox for optimization over symmetric cones. Opt. Methods Softw. 11(12), 625\u2013653 (1999)","journal-title":"Opt. Methods Softw."},{"key":"306_CR39","first-page":"267","volume":"58","author":"R. Tibshirani","year":"1996","unstructured":"Tibshirani R.: Regression shrinkage and selection via the lasso. J. R. Stat. Soc. B 58, 267\u2013288 (1996)","journal-title":"J. R. Stat. Soc. B"},{"key":"306_CR40","doi-asserted-by":"crossref","first-page":"1030","DOI":"10.1109\/TIT.2005.864420","volume":"51","author":"J. Tropp","year":"2006","unstructured":"Tropp J.: Just relax: convex programming methods for identifying sparse signals. IEEE Trans. Inf. Theory 51, 1030\u20131051 (2006)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"6","key":"306_CR41","doi-asserted-by":"crossref","first-page":"520","DOI":"10.1093\/bioinformatics\/17.6.520","volume":"17","author":"O. Troyanskaya","year":"2001","unstructured":"Troyanskaya O., Cantor M., Sherlock G., Brown P., Hastie T., Tibshirani R., Botstein D., Altman R.B.: Missing value estimation methods for DNA microarrays. Bioinformatics 17(6), 520\u2013525 (2001)","journal-title":"Bioinformatics"},{"key":"306_CR42","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1007\/s10107-002-0347-5","volume":"95","author":"R.H. T\u00fct\u00fcnc\u00fc","year":"2003","unstructured":"T\u00fct\u00fcnc\u00fc R.H., Toh K.C., Todd M.J.: Solving semidefinite-quadratic-linear programs using SDPT3. Math. Program. Ser. B 95, 189\u2013217 (2003)","journal-title":"Math. Program. Ser. B"},{"issue":"2","key":"306_CR43","doi-asserted-by":"crossref","first-page":"890","DOI":"10.1137\/080714488","volume":"31","author":"E. Berg van den","year":"2008","unstructured":"van den Berg E., Friedlander M.P.: Probing the Pareto frontier for basis pursuit solutions. SIAM J. Sci. Comput. 31(2), 890\u2013912 (2008)","journal-title":"SIAM J. Sci. Comput."},{"key":"306_CR44","unstructured":"Wen, Z., Yin, W., Goldfarb, D., Zhang, Y.: A fast algorithm for sparse reconstruction based on shrinkage, subspace optimization and continuation. Technical Report, Department of IEOR, Columbia University (2009)"},{"issue":"1","key":"306_CR45","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1137\/070703983","volume":"1","author":"W. Yin","year":"2008","unstructured":"Yin W., Osher S., Goldfarb D., Darbon J.: Bregman iterative algorithms for \u2113 1-minimization with applications to compressed sensing. SIAM J. Imaging Sci. 1(1), 143\u2013168 (2008)","journal-title":"SIAM J. Imaging Sci."}],"container-title":["Mathematical Programming"],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-009-0306-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10107-009-0306-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-009-0306-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,29]],"date-time":"2019-05-29T09:50:07Z","timestamp":1559123407000},"score":1.0,"issued":{"date-parts":[[2009,9,23]]},"references-count":45,"journal-issue":{"published-print":{"date-parts":[[2011,6]]},"issue":"1-2"},"alternative-id":["306"],"URL":"http:\/\/dx.doi.org\/10.1007\/s10107-009-0306-5","relation":{"cites":[]},"ISSN":["0025-5610","1436-4646"],"issn-type":[{"value":"0025-5610","type":"print"},{"value":"1436-4646","type":"electronic"}],"subject":["Software","General Mathematics"]}],"items-per-page":3,"query":{"start-index":0,"search-terms":null}}} \ No newline at end of file
diff --git a/python/tests/harvest_crossref.py b/python/tests/harvest_crossref.py
new file mode 100644
index 00000000..52aa7b81
--- /dev/null
+++ b/python/tests/harvest_crossref.py
@@ -0,0 +1,45 @@
+
+import json
+import pytest
+import datetime
+import responses
+from fatcat_tools.harvest import *
+
+
+@responses.activate
+def test_crossref_harvest_date(mocker):
+
+ # mock out the harvest state object so it doesn't try to actually connect
+ # to Kafka
+ mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka')
+
+ # mock day request to crossref API
+ with open('tests/files/crossref_api_works.json', 'r') as f:
+ crossref_resp = json.loads(f.readline())
+ responses.add(responses.GET, 'https://api.crossref.org/works',
+ json=crossref_resp, status=200)
+
+ harvester = HarvestCrossrefWorker(
+ kafka_hosts="dummy",
+ produce_topic="dummy-produce-topic",
+ state_topic="dummy-state-topic",
+ contact_email="test@fatcat.wiki",
+ )
+
+ harvester.producer = mocker.Mock()
+
+ harvester.fetch_date(datetime.date(2019, 2, 3))
+
+ assert len(responses.calls) == 1
+
+ # ensure email was included in User-Agent
+ assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers['User-Agent']
+
+ # check that correct date param was passed as expected
+ assert "filter=from-index-date%3A2019-02-03" in responses.calls[0].request.url
+
+ # check that we published the expected number of DOI objects were published
+ # to the (mock) kafka topic
+ assert harvester.producer.produce.call_count == 3
+ assert harvester.producer.flush.call_count == 1
+ assert harvester.producer.poll.called_once_with(0)