From f58d4c2605bb028fd8844b25d345b524a5d47a87 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 23 Jul 2021 17:47:40 -0700 Subject: refs: small refactors/tweaks --- python/fatcat_tools/references.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'python/fatcat_tools/references.py') diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py index 67a16602..73f57e18 100644 --- a/python/fatcat_tools/references.py +++ b/python/fatcat_tools/references.py @@ -46,7 +46,7 @@ class BiblioRef(BaseModel): target_release_ident: Optional[str] target_work_ident: Optional[str] target_openlibrary_work: Optional[str] - target_url_surt: Optional[str] + # TODO: target_url_surt: Optional[str] # would not be stored in elasticsearch, but would be auto-generated by all "get" methods from the SURT, so calling code does not need to do SURT transform target_url: Optional[str] @@ -69,10 +69,22 @@ class BiblioRef(BaseModel): """ if self.target_openlibrary_work and self.target_openlibrary_work.startswith("/works/"): self.target_openlibrary_work = self.target_openlibrary_work[7:] - if self.target_url_surt and not self.target_url: - # TODO: convert SURT to regular URL - pass - # TODO: if target_openlibrary_work, add an access option? + + # work-arounds for bad/weird ref_key + if self.ref_key: + self.ref_key = self.ref_key.strip() + if self.ref_key[0] in ['/', '_']: + self.ref_key = self.ref_key[1:] + if self.ref_key.startswith("10.") and 'SICI' in self.ref_key and '-' in self.ref_key: + self.ref_key = self.ref_key.split('-')[-1] + if self.ref_key.startswith("10.") and '_' in self.ref_key: + self.ref_key = self.ref_key.split('_')[-1] + if len(self.ref_key) > 10 and "#" in self.ref_key: + self.ref_key = self.ref_key.split('#')[-1] + if len(self.ref_key) > 10 and "_" in self.ref_key: + self.ref_key = self.ref_key.split('_')[-1] + if not self.ref_key and self.ref_index is not None: + self.ref_key = str(self.ref_index) return self @@ -181,7 +193,6 @@ def get_inbound_refs( release_ident: Optional[str] = None, work_ident: Optional[str] = None, openlibrary_work: Optional[str] = None, - url_surt: Optional[str] = None, url: Optional[str] = None, consolidate_works: bool = True, filter_stage: List[str] = [], @@ -207,8 +218,6 @@ def get_inbound_refs( search = search.filter("term", target_work_ident=work_ident) elif openlibrary_work: search = search.filter("term", target_openlibrary_work=openlibrary_work) - elif url_surt: - search = search.filter("term", target_url_surt=url_surt) else: raise ValueError("require a lookup key") @@ -230,7 +239,6 @@ def count_inbound_refs( release_ident: Optional[str] = None, work_ident: Optional[str] = None, openlibrary_work: Optional[str] = None, - url_surt: Optional[str] = None, url: Optional[str] = None, filter_stage: List[str] = [], es_index: str = "fatcat_ref", @@ -247,8 +255,6 @@ def count_inbound_refs( search = search.filter("term", target_work_ident=work_ident) elif openlibrary_work: search = search.filter("term", target_openlibrary_work=openlibrary_work) - elif url_surt: - search = search.filter("term", target_url_surt=url_surt) else: raise ValueError("require a lookup key") -- cgit v1.2.3