summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/harvest
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2019-12-06 15:12:21 +0100
committerMartin Czygan <martin.czygan@gmail.com>2019-12-27 00:13:40 +0100
commit27d79252aa60379c3dc45b4d6072b21a9f82b8c1 (patch)
tree8252de111cf42b5a8c5a510be38ef05faf9649f0 /python/fatcat_tools/harvest
parent5bdfc1518adb545f15d3ac053e831a13cb96c971 (diff)
downloadfatcat-27d79252aa60379c3dc45b4d6072b21a9f82b8c1.tar.gz
fatcat-27d79252aa60379c3dc45b4d6072b21a9f82b8c1.zip
datacite: update documentation, add links to issues
Diffstat (limited to 'python/fatcat_tools/harvest')
-rw-r--r--python/fatcat_tools/harvest/doi_registrars.py15
1 files changed, 5 insertions, 10 deletions
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index e24a979d..5af5395e 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -214,20 +214,15 @@ class HarvestDataciteWorker(HarvestCrossrefWorker):
def update_params(self, params, resp):
"""
- We need to parse out the cursor value from the next link.
+ Using cursor mechanism (https://support.datacite.org/docs/pagination#section-cursor).
$ curl -sL https://is.gd/cLbE5h | jq -r .links.next
- https://api.datacite.org/dois?page%5Bcursor%5D=MTMxNjgwODE3NTAwMCwxMC41NDM5LzEwMjUxOTI&page%5Bsize%5D=50&query=updated%3A%5B2019-11-18T00%3A00%3A00.000Z+TO+2019-11-18T23%3A59%3A59.000Z%5D
+ Example: https://is.gd/cLbE5h
- Notes.
-
- (1) HTTP 400 issues.
-
- Funny "search_after has 3 value(s) but sort has 2." on
- https://api.datacite.org/dois?page%5Bsize%5D=50&page%5Bcursor%5D=MTQyMzQ2ODQwMTAwMCwxMC41Njc1L0hZV0FfMjAxNSwxXzI&query=updated%3A%5B2019-11-20T00%3A00%3A00.000Z+TO+2019-11-20T23%3A59%3A59.000Z%5D
-
- Reported as https://github.com/datacite/datacite/issues/897.
+ Further API errors reported:
+ https://github.com/datacite/datacite/issues/897 (HTTP 400)
+ https://github.com/datacite/datacite/issues/898 (HTTP 500)
"""
parsed = urlparse(resp['links']['next'])
page_cursor = parse_qs(parsed.query).get('page[cursor]')