aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/fatcat_tools/harvest/doi_registrars.py15
1 files changed, 5 insertions, 10 deletions
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index e24a979d..5af5395e 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -214,20 +214,15 @@ class HarvestDataciteWorker(HarvestCrossrefWorker):
def update_params(self, params, resp):
"""
- We need to parse out the cursor value from the next link.
+ Using cursor mechanism (https://support.datacite.org/docs/pagination#section-cursor).
$ curl -sL https://is.gd/cLbE5h | jq -r .links.next
- https://api.datacite.org/dois?page%5Bcursor%5D=MTMxNjgwODE3NTAwMCwxMC41NDM5LzEwMjUxOTI&page%5Bsize%5D=50&query=updated%3A%5B2019-11-18T00%3A00%3A00.000Z+TO+2019-11-18T23%3A59%3A59.000Z%5D
+ Example: https://is.gd/cLbE5h
- Notes.
-
- (1) HTTP 400 issues.
-
- Funny "search_after has 3 value(s) but sort has 2." on
- https://api.datacite.org/dois?page%5Bsize%5D=50&page%5Bcursor%5D=MTQyMzQ2ODQwMTAwMCwxMC41Njc1L0hZV0FfMjAxNSwxXzI&query=updated%3A%5B2019-11-20T00%3A00%3A00.000Z+TO+2019-11-20T23%3A59%3A59.000Z%5D
-
- Reported as https://github.com/datacite/datacite/issues/897.
+ Further API errors reported:
+ https://github.com/datacite/datacite/issues/897 (HTTP 400)
+ https://github.com/datacite/datacite/issues/898 (HTTP 500)
"""
parsed = urlparse(resp['links']['next'])
page_cursor = parse_qs(parsed.query).get('page[cursor]')