aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/sandcrawler.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-06-01 01:05:23 -0700
committerBryan Newbold <bnewbold@archive.org>2021-06-02 00:38:51 -0700
commit4a40c62f6616825342bb23d03b9c4b9eebfe809c (patch)
treef999a05e76a4e2c3965dca2a12a07f654810f869 /fatcat_scholar/sandcrawler.py
parent01a1978d5b9667df4ae71a7934512e6c4e3bf9a8 (diff)
downloadfatcat-scholar-4a40c62f6616825342bb23d03b9c4b9eebfe809c.tar.gz
fatcat-scholar-4a40c62f6616825342bb23d03b9c4b9eebfe809c.zip
add 'crossref' hydration to work pipeline
The immediate motivation is to include recent crossref refs in citation graph transforms. May also be valuable for researchers to have authoritative/publisher metadata in the bundle dumps.
Diffstat (limited to 'fatcat_scholar/sandcrawler.py')
-rw-r--r--fatcat_scholar/sandcrawler.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/fatcat_scholar/sandcrawler.py b/fatcat_scholar/sandcrawler.py
index 356b373..9b033b8 100644
--- a/fatcat_scholar/sandcrawler.py
+++ b/fatcat_scholar/sandcrawler.py
@@ -38,6 +38,17 @@ class SandcrawlerPostgrestClient:
else:
return None
+ def get_crossref(self, doi: str) -> Optional[Dict[str, Any]]:
+ resp = requests.get(
+ self.api_url + "/crossref", params=dict(doi="eq." + doi)
+ )
+ resp.raise_for_status()
+ resp_json = resp.json()
+ if resp_json:
+ return resp_json[0]
+ else:
+ return None
+
class SandcrawlerMinioClient:
def __init__(