diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-07-23 17:47:40 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-07-23 17:47:40 -0700 |
commit | f58d4c2605bb028fd8844b25d345b524a5d47a87 (patch) | |
tree | 1bff9d4f5e1f40b4107d328e9dc1bd55f6111a23 /python/fatcat_tools | |
parent | d01ab339a0aa568b6ccd6c56beb611a2e7ac9686 (diff) | |
download | fatcat-f58d4c2605bb028fd8844b25d345b524a5d47a87.tar.gz fatcat-f58d4c2605bb028fd8844b25d345b524a5d47a87.zip |
refs: small refactors/tweaks
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/references.py | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py index 67a16602..73f57e18 100644 --- a/python/fatcat_tools/references.py +++ b/python/fatcat_tools/references.py @@ -46,7 +46,7 @@ class BiblioRef(BaseModel): target_release_ident: Optional[str] target_work_ident: Optional[str] target_openlibrary_work: Optional[str] - target_url_surt: Optional[str] + # TODO: target_url_surt: Optional[str] # would not be stored in elasticsearch, but would be auto-generated by all "get" methods from the SURT, so calling code does not need to do SURT transform target_url: Optional[str] @@ -69,10 +69,22 @@ class BiblioRef(BaseModel): """ if self.target_openlibrary_work and self.target_openlibrary_work.startswith("/works/"): self.target_openlibrary_work = self.target_openlibrary_work[7:] - if self.target_url_surt and not self.target_url: - # TODO: convert SURT to regular URL - pass - # TODO: if target_openlibrary_work, add an access option? + + # work-arounds for bad/weird ref_key + if self.ref_key: + self.ref_key = self.ref_key.strip() + if self.ref_key[0] in ['/', '_']: + self.ref_key = self.ref_key[1:] + if self.ref_key.startswith("10.") and 'SICI' in self.ref_key and '-' in self.ref_key: + self.ref_key = self.ref_key.split('-')[-1] + if self.ref_key.startswith("10.") and '_' in self.ref_key: + self.ref_key = self.ref_key.split('_')[-1] + if len(self.ref_key) > 10 and "#" in self.ref_key: + self.ref_key = self.ref_key.split('#')[-1] + if len(self.ref_key) > 10 and "_" in self.ref_key: + self.ref_key = self.ref_key.split('_')[-1] + if not self.ref_key and self.ref_index is not None: + self.ref_key = str(self.ref_index) return self @@ -181,7 +193,6 @@ def get_inbound_refs( release_ident: Optional[str] = None, work_ident: Optional[str] = None, openlibrary_work: Optional[str] = None, - url_surt: Optional[str] = None, url: Optional[str] = None, consolidate_works: bool = True, filter_stage: List[str] = [], @@ -207,8 +218,6 @@ def get_inbound_refs( search = search.filter("term", target_work_ident=work_ident) elif openlibrary_work: search = search.filter("term", target_openlibrary_work=openlibrary_work) - elif url_surt: - search = search.filter("term", target_url_surt=url_surt) else: raise ValueError("require a lookup key") @@ -230,7 +239,6 @@ def count_inbound_refs( release_ident: Optional[str] = None, work_ident: Optional[str] = None, openlibrary_work: Optional[str] = None, - url_surt: Optional[str] = None, url: Optional[str] = None, filter_stage: List[str] = [], es_index: str = "fatcat_ref", @@ -247,8 +255,6 @@ def count_inbound_refs( search = search.filter("term", target_work_ident=work_ident) elif openlibrary_work: search = search.filter("term", target_openlibrary_work=openlibrary_work) - elif url_surt: - search = search.filter("term", target_url_surt=url_surt) else: raise ValueError("require a lookup key") |