diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-12-06 16:25:21 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-12-06 16:50:00 -0800 |
commit | f60d37fb2f9079e6707f9a253983b6ea07964e18 (patch) | |
tree | e36db11b2123a7a50d48dbf0f8449bcef9e13fcc | |
parent | c1af67fc72c671c4dc40536960ab47e78195c881 (diff) | |
download | fatcat-scholar-f60d37fb2f9079e6707f9a253983b6ea07964e18.tar.gz fatcat-scholar-f60d37fb2f9079e6707f9a253983b6ea07964e18.zip |
fetch GROBID-parsed refs along with crossref metadata
-rw-r--r-- | fatcat_scholar/sandcrawler.py | 6 | ||||
-rw-r--r-- | fatcat_scholar/work_pipeline.py | 3 | ||||
-rw-r--r-- | tests/test_work_pipeline.py | 3 |
3 files changed, 8 insertions, 4 deletions
diff --git a/fatcat_scholar/sandcrawler.py b/fatcat_scholar/sandcrawler.py index 087cdc6..5580841 100644 --- a/fatcat_scholar/sandcrawler.py +++ b/fatcat_scholar/sandcrawler.py @@ -39,8 +39,10 @@ class SandcrawlerPostgrestClient: else: return None - def get_crossref(self, doi: str) -> Optional[Dict[str, Any]]: - resp = requests.get(self.api_url + "/crossref", params=dict(doi="eq." + doi)) + def get_crossref_with_refs(self, doi: str) -> Optional[Dict[str, Any]]: + resp = requests.get( + self.api_url + "/crossref_with_refs", params=dict(doi="eq." + doi) + ) resp.raise_for_status() resp_json = resp.json() if resp_json: diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index 9dce006..fa1a7bc 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -276,13 +276,14 @@ class WorkPipeline: # if this is definitely a Datacite or JALC DOI, can skip the Crossref cache lookup return None doi = re.ext_ids.doi.lower() - crossref_meta = self.sandcrawler_db_client.get_crossref(doi) + crossref_meta = self.sandcrawler_db_client.get_crossref_with_refs(doi) if not crossref_meta or not crossref_meta.get("record"): return None return dict( release_ident=re.ident, doi=doi, record=crossref_meta["record"], + grobid_refs=crossref_meta["refs_json"] or [], ) def lookup_sim(self, release: ReleaseEntity) -> Optional[SimIssueRow]: diff --git a/tests/test_work_pipeline.py b/tests/test_work_pipeline.py index a28f728..bc8a79a 100644 --- a/tests/test_work_pipeline.py +++ b/tests/test_work_pipeline.py @@ -73,7 +73,7 @@ def test_run_transform(mocker: Any) -> None: responses.add( responses.GET, - "http://disabled-during-tests-bogus.xyz:3333/crossref?doi=eq.10.7717%2Fpeerj.4375", + "http://disabled-during-tests-bogus.xyz:3333/crossref_with_refs?doi=eq.10.7717%2Fpeerj.4375", status=200, json=[ { @@ -83,6 +83,7 @@ def test_run_transform(mocker: Any) -> None: "title": "something", "TODO_better_object": 3, }, + "refs_json": [], } ], ) |