diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-06-01 01:05:23 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-06-02 00:38:51 -0700 |
commit | 4a40c62f6616825342bb23d03b9c4b9eebfe809c (patch) | |
tree | f999a05e76a4e2c3965dca2a12a07f654810f869 /fatcat_scholar/sandcrawler.py | |
parent | 01a1978d5b9667df4ae71a7934512e6c4e3bf9a8 (diff) | |
download | fatcat-scholar-4a40c62f6616825342bb23d03b9c4b9eebfe809c.tar.gz fatcat-scholar-4a40c62f6616825342bb23d03b9c4b9eebfe809c.zip |
add 'crossref' hydration to work pipeline
The immediate motivation is to include recent crossref refs in citation
graph transforms.
May also be valuable for researchers to have authoritative/publisher
metadata in the bundle dumps.
Diffstat (limited to 'fatcat_scholar/sandcrawler.py')
-rw-r--r-- | fatcat_scholar/sandcrawler.py | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/fatcat_scholar/sandcrawler.py b/fatcat_scholar/sandcrawler.py index 356b373..9b033b8 100644 --- a/fatcat_scholar/sandcrawler.py +++ b/fatcat_scholar/sandcrawler.py @@ -38,6 +38,17 @@ class SandcrawlerPostgrestClient: else: return None + def get_crossref(self, doi: str) -> Optional[Dict[str, Any]]: + resp = requests.get( + self.api_url + "/crossref", params=dict(doi="eq." + doi) + ) + resp.raise_for_status() + resp_json = resp.json() + if resp_json: + return resp_json[0] + else: + return None + class SandcrawlerMinioClient: def __init__( |