diff options
-rw-r--r-- | fatcat_scholar/identifiers.py | 2 | ||||
-rw-r--r-- | fatcat_scholar/transform.py | 14 | ||||
-rw-r--r-- | tests/test_refs_transform.py | 18 |
3 files changed, 24 insertions, 10 deletions
diff --git a/fatcat_scholar/identifiers.py b/fatcat_scholar/identifiers.py index 7572e20..9a64de8 100644 --- a/fatcat_scholar/identifiers.py +++ b/fatcat_scholar/identifiers.py @@ -27,7 +27,7 @@ def clean_doi(raw: Optional[str]) -> Optional[str]: if not "10." in raw: return None if not raw.startswith("10."): - raw = raw[raw.find("10."):] + raw = raw[raw.find("10.") :] if raw[7:9] == "//": raw = raw[:8] + raw[9:] diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 641eef8..13bedb9 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -483,7 +483,10 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: raise NotImplementedError(f"doc_type: {heavy.doc_type}") # TODO: this crude filter should not be necessary once we upgrade to GROBID v0.6+ - if heavy.grobid_fulltext and heavy.grobid_fulltext.get('file_ident') != 'gbbvrg2tpzan5hl3qcsfzh4vfq': + if ( + heavy.grobid_fulltext + and heavy.grobid_fulltext.get("file_ident") != "gbbvrg2tpzan5hl3qcsfzh4vfq" + ): fulltext_release = [ r for r in heavy.releases @@ -825,9 +828,9 @@ def refs_from_crossref( work_ident=release.work_id, release_stage=release.release_stage, release_year=release.release_year, - index=i + 1, # 1-indexed + index=i + 1, # 1-indexed key=clean_ref_key(ref.get("key"), doi=record.get("DOI")), - #locator, + # locator, target_release_id=None, ref_source=ref_source, ) @@ -863,7 +866,10 @@ def refs_from_heavy(heavy: IntermediateBundle) -> Sequence[RefStructured]: fulltext_refs: List[RefStructured] = [] # TODO: this crude filter should not be necessary once we upgrade to GROBID v0.6+ - if heavy.grobid_fulltext and heavy.grobid_fulltext.get('file_ident') != 'gbbvrg2tpzan5hl3qcsfzh4vfq': + if ( + heavy.grobid_fulltext + and heavy.grobid_fulltext.get("file_ident") != "gbbvrg2tpzan5hl3qcsfzh4vfq" + ): fulltext_release = [ r for r in heavy.releases diff --git a/tests/test_refs_transform.py b/tests/test_refs_transform.py index b923492..078b73b 100644 --- a/tests/test_refs_transform.py +++ b/tests/test_refs_transform.py @@ -42,6 +42,7 @@ def test_transform_refs_grobid() -> None: == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19." ) + def test_transform_refs_crossref() -> None: with open("tests/files/example_crossref_record.json", "r") as f: @@ -68,23 +69,30 @@ def test_transform_refs_crossref() -> None: assert refs[0].biblio.contrib_raw_names is not None assert refs[0].biblio.contrib_raw_names[0] == "Churg" assert refs[0].biblio.container_name == "Arch. Pathol. Lab. Med." - assert refs[0].biblio.title == "The separation of benign and malignant mesothelial proliferations" + assert ( + refs[0].biblio.title + == "The separation of benign and malignant mesothelial proliferations" + ) assert refs[0].biblio.year == 2012 assert refs[0].biblio.pages == "1217" assert refs[0].biblio.volume == "136" assert refs[0].biblio.doi == "10.5858/arpa.2012-0112-ra" assert refs[0].biblio.unstructured is None - assert refs[6].biblio.title == "Advances in Laser Remote Sensing – Selected Papers Presented at the 20th International Laser Radar Conference" + assert ( + refs[6].biblio.title + == "Advances in Laser Remote Sensing – Selected Papers Presented at the 20th International Laser Radar Conference" + ) assert refs[6].biblio.year == 2001 assert refs[7].key == "CIT0041" - assert refs[7].biblio.unstructured == "Linda Weiss,Creating Capitalism. Oxford: Blackwell, 1988. 272 pp. £29.95. ISBN 0 631 15733 6." - + assert ( + refs[7].biblio.unstructured + == "Linda Weiss,Creating Capitalism. Oxford: Blackwell, 1988. 272 pp. £29.95. ISBN 0 631 15733 6." + ) assert refs[8].key == "576_CR3" assert refs[8].biblio.unstructured is not None assert refs[8].biblio.title == "The NURBS Book, Monographs in Visual Communication" assert refs[8].biblio.year == 1997 assert refs[8].biblio.version == "2" - |