aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/transform.py
diff options
context:
space:
mode:
Diffstat (limited to 'fatcat_scholar/transform.py')
-rw-r--r--fatcat_scholar/transform.py65
1 files changed, 37 insertions, 28 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index 04922ba..c08be7b 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -290,6 +290,42 @@ def biblio_metadata_hacks(biblio: ScholarBiblio) -> ScholarBiblio: # noqa: C901
return biblio
+def generate_tags(
+ biblio: ScholarBiblio, primary_release: Optional[ReleaseEntity]
+) -> List[str]:
+
+ tags = []
+
+ # tags
+ if biblio.license_slug and biblio.license_slug.lower().startswith("cc-"):
+ tags.append("oa")
+ if primary_release and primary_release.container:
+ container = primary_release.container
+ if container.extra:
+ if container.extra.get("doaj"):
+ tags.append("doaj")
+ tags.append("oa")
+ if container.extra.get("road"):
+ tags.append("road")
+ tags.append("oa")
+ if container.extra.get("szczepanski"):
+ tags.append("szczepanski")
+ tags.append("oa")
+ if container.extra.get("ia", {}).get("longtail_oa"):
+ tags.append("longtail")
+ tags.append("oa")
+ if container.extra.get("sherpa_romeo", {}).get("color") == "white":
+ tags.append("oa")
+ if container.extra.get("default_license", "").lower().startswith("cc-"):
+ tags.append("oa")
+ if container.extra.get("platform"):
+ # scielo, ojs, wordpress, etc
+ tags.append(container.extra["platform"].lower())
+ if biblio.doi_prefix == "10.2307":
+ tags.append("jstor")
+ return list(set(tags))
+
+
def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
tags: List[str] = []
@@ -386,34 +422,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
# TODO: additional abstracts
- # tags
- if biblio.license_slug and biblio.license_slug.lower().startswith("cc-"):
- tags.append("oa")
- if primary_release and primary_release.container:
- container = primary_release.container
- if container.extra:
- if container.extra.get("doaj"):
- tags.append("doaj")
- tags.append("oa")
- if container.extra.get("road"):
- tags.append("road")
- tags.append("oa")
- if container.extra.get("szczepanski"):
- tags.append("szczepanski")
- tags.append("oa")
- if container.extra.get("ia", {}).get("longtail_oa"):
- tags.append("longtail")
- tags.append("oa")
- if container.extra.get("sherpa_romeo", {}).get("color") == "white":
- tags.append("oa")
- if container.extra.get("default_license", "").lower().startswith("cc-"):
- tags.append("oa")
- if container.extra.get("platform"):
- # scielo, ojs, wordpress, etc
- tags.append(container.extra["platform"].lower())
- if biblio.doi_prefix == "10.2307":
- tags.append("jstor")
- tags = list(set(tags))
+ tags = generate_tags(biblio, primary_release)
# biorxiv/medrxiv hacks
if (