diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2019-12-06 15:12:21 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2019-12-27 00:13:40 +0100 |
commit | 27d79252aa60379c3dc45b4d6072b21a9f82b8c1 (patch) | |
tree | 8252de111cf42b5a8c5a510be38ef05faf9649f0 /python/fatcat_tools/harvest | |
parent | 5bdfc1518adb545f15d3ac053e831a13cb96c971 (diff) | |
download | fatcat-27d79252aa60379c3dc45b4d6072b21a9f82b8c1.tar.gz fatcat-27d79252aa60379c3dc45b4d6072b21a9f82b8c1.zip |
datacite: update documentation, add links to issues
Diffstat (limited to 'python/fatcat_tools/harvest')
-rw-r--r-- | python/fatcat_tools/harvest/doi_registrars.py | 15 |
1 files changed, 5 insertions, 10 deletions
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index e24a979d..5af5395e 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -214,20 +214,15 @@ class HarvestDataciteWorker(HarvestCrossrefWorker): def update_params(self, params, resp): """ - We need to parse out the cursor value from the next link. + Using cursor mechanism (https://support.datacite.org/docs/pagination#section-cursor). $ curl -sL https://is.gd/cLbE5h | jq -r .links.next - https://api.datacite.org/dois?page%5Bcursor%5D=MTMxNjgwODE3NTAwMCwxMC41NDM5LzEwMjUxOTI&page%5Bsize%5D=50&query=updated%3A%5B2019-11-18T00%3A00%3A00.000Z+TO+2019-11-18T23%3A59%3A59.000Z%5D + Example: https://is.gd/cLbE5h - Notes. - - (1) HTTP 400 issues. - - Funny "search_after has 3 value(s) but sort has 2." on - https://api.datacite.org/dois?page%5Bsize%5D=50&page%5Bcursor%5D=MTQyMzQ2ODQwMTAwMCwxMC41Njc1L0hZV0FfMjAxNSwxXzI&query=updated%3A%5B2019-11-20T00%3A00%3A00.000Z+TO+2019-11-20T23%3A59%3A59.000Z%5D - - Reported as https://github.com/datacite/datacite/issues/897. + Further API errors reported: + https://github.com/datacite/datacite/issues/897 (HTTP 400) + https://github.com/datacite/datacite/issues/898 (HTTP 500) """ parsed = urlparse(resp['links']['next']) page_cursor = parse_qs(parsed.query).get('page[cursor]') |