summaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/transform.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-10-16 13:51:49 -0700
committerBryan Newbold <bnewbold@archive.org>2020-10-16 13:51:49 -0700
commit7497d1baf0c3a9c24f5b9ce05c9567e555e4e6c9 (patch)
treecfed323379987bbb5e63959c0d8e58bd9f04caa5 /fatcat_scholar/transform.py
parent7d80405505f74f3e7108567ab5cc5782278eda01 (diff)
downloadfatcat-scholar-7497d1baf0c3a9c24f5b9ce05c9567e555e4e6c9.tar.gz
fatcat-scholar-7497d1baf0c3a9c24f5b9ce05c9567e555e4e6c9.zip
transform: refactor tag generation out of transform heavy method
Diffstat (limited to 'fatcat_scholar/transform.py')
-rw-r--r--fatcat_scholar/transform.py65
1 files changed, 37 insertions, 28 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index 04922ba..c08be7b 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -290,6 +290,42 @@ def biblio_metadata_hacks(biblio: ScholarBiblio) -> ScholarBiblio: # noqa: C901
return biblio
+def generate_tags(
+ biblio: ScholarBiblio, primary_release: Optional[ReleaseEntity]
+) -> List[str]:
+
+ tags = []
+
+ # tags
+ if biblio.license_slug and biblio.license_slug.lower().startswith("cc-"):
+ tags.append("oa")
+ if primary_release and primary_release.container:
+ container = primary_release.container
+ if container.extra:
+ if container.extra.get("doaj"):
+ tags.append("doaj")
+ tags.append("oa")
+ if container.extra.get("road"):
+ tags.append("road")
+ tags.append("oa")
+ if container.extra.get("szczepanski"):
+ tags.append("szczepanski")
+ tags.append("oa")
+ if container.extra.get("ia", {}).get("longtail_oa"):
+ tags.append("longtail")
+ tags.append("oa")
+ if container.extra.get("sherpa_romeo", {}).get("color") == "white":
+ tags.append("oa")
+ if container.extra.get("default_license", "").lower().startswith("cc-"):
+ tags.append("oa")
+ if container.extra.get("platform"):
+ # scielo, ojs, wordpress, etc
+ tags.append(container.extra["platform"].lower())
+ if biblio.doi_prefix == "10.2307":
+ tags.append("jstor")
+ return list(set(tags))
+
+
def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
tags: List[str] = []
@@ -386,34 +422,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
# TODO: additional abstracts
- # tags
- if biblio.license_slug and biblio.license_slug.lower().startswith("cc-"):
- tags.append("oa")
- if primary_release and primary_release.container:
- container = primary_release.container
- if container.extra:
- if container.extra.get("doaj"):
- tags.append("doaj")
- tags.append("oa")
- if container.extra.get("road"):
- tags.append("road")
- tags.append("oa")
- if container.extra.get("szczepanski"):
- tags.append("szczepanski")
- tags.append("oa")
- if container.extra.get("ia", {}).get("longtail_oa"):
- tags.append("longtail")
- tags.append("oa")
- if container.extra.get("sherpa_romeo", {}).get("color") == "white":
- tags.append("oa")
- if container.extra.get("default_license", "").lower().startswith("cc-"):
- tags.append("oa")
- if container.extra.get("platform"):
- # scielo, ojs, wordpress, etc
- tags.append(container.extra["platform"].lower())
- if biblio.doi_prefix == "10.2307":
- tags.append("jstor")
- tags = list(set(tags))
+ tags = generate_tags(biblio, primary_release)
# biorxiv/medrxiv hacks
if (