aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/fatcat_tools/references.py28
1 files changed, 17 insertions, 11 deletions
diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py
index 67a16602..73f57e18 100644
--- a/python/fatcat_tools/references.py
+++ b/python/fatcat_tools/references.py
@@ -46,7 +46,7 @@ class BiblioRef(BaseModel):
target_release_ident: Optional[str]
target_work_ident: Optional[str]
target_openlibrary_work: Optional[str]
- target_url_surt: Optional[str]
+ # TODO: target_url_surt: Optional[str]
# would not be stored in elasticsearch, but would be auto-generated by all "get" methods from the SURT, so calling code does not need to do SURT transform
target_url: Optional[str]
@@ -69,10 +69,22 @@ class BiblioRef(BaseModel):
"""
if self.target_openlibrary_work and self.target_openlibrary_work.startswith("/works/"):
self.target_openlibrary_work = self.target_openlibrary_work[7:]
- if self.target_url_surt and not self.target_url:
- # TODO: convert SURT to regular URL
- pass
- # TODO: if target_openlibrary_work, add an access option?
+
+ # work-arounds for bad/weird ref_key
+ if self.ref_key:
+ self.ref_key = self.ref_key.strip()
+ if self.ref_key[0] in ['/', '_']:
+ self.ref_key = self.ref_key[1:]
+ if self.ref_key.startswith("10.") and 'SICI' in self.ref_key and '-' in self.ref_key:
+ self.ref_key = self.ref_key.split('-')[-1]
+ if self.ref_key.startswith("10.") and '_' in self.ref_key:
+ self.ref_key = self.ref_key.split('_')[-1]
+ if len(self.ref_key) > 10 and "#" in self.ref_key:
+ self.ref_key = self.ref_key.split('#')[-1]
+ if len(self.ref_key) > 10 and "_" in self.ref_key:
+ self.ref_key = self.ref_key.split('_')[-1]
+ if not self.ref_key and self.ref_index is not None:
+ self.ref_key = str(self.ref_index)
return self
@@ -181,7 +193,6 @@ def get_inbound_refs(
release_ident: Optional[str] = None,
work_ident: Optional[str] = None,
openlibrary_work: Optional[str] = None,
- url_surt: Optional[str] = None,
url: Optional[str] = None,
consolidate_works: bool = True,
filter_stage: List[str] = [],
@@ -207,8 +218,6 @@ def get_inbound_refs(
search = search.filter("term", target_work_ident=work_ident)
elif openlibrary_work:
search = search.filter("term", target_openlibrary_work=openlibrary_work)
- elif url_surt:
- search = search.filter("term", target_url_surt=url_surt)
else:
raise ValueError("require a lookup key")
@@ -230,7 +239,6 @@ def count_inbound_refs(
release_ident: Optional[str] = None,
work_ident: Optional[str] = None,
openlibrary_work: Optional[str] = None,
- url_surt: Optional[str] = None,
url: Optional[str] = None,
filter_stage: List[str] = [],
es_index: str = "fatcat_ref",
@@ -247,8 +255,6 @@ def count_inbound_refs(
search = search.filter("term", target_work_ident=work_ident)
elif openlibrary_work:
search = search.filter("term", target_openlibrary_work=openlibrary_work)
- elif url_surt:
- search = search.filter("term", target_url_surt=url_surt)
else:
raise ValueError("require a lookup key")