diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-10-16 13:51:49 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-10-16 13:51:49 -0700 |
commit | 7497d1baf0c3a9c24f5b9ce05c9567e555e4e6c9 (patch) | |
tree | cfed323379987bbb5e63959c0d8e58bd9f04caa5 /fatcat_scholar | |
parent | 7d80405505f74f3e7108567ab5cc5782278eda01 (diff) | |
download | fatcat-scholar-7497d1baf0c3a9c24f5b9ce05c9567e555e4e6c9.tar.gz fatcat-scholar-7497d1baf0c3a9c24f5b9ce05c9567e555e4e6c9.zip |
transform: refactor tag generation out of transform heavy method
Diffstat (limited to 'fatcat_scholar')
-rw-r--r-- | fatcat_scholar/transform.py | 65 |
1 files changed, 37 insertions, 28 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 04922ba..c08be7b 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -290,6 +290,42 @@ def biblio_metadata_hacks(biblio: ScholarBiblio) -> ScholarBiblio: # noqa: C901 return biblio +def generate_tags( + biblio: ScholarBiblio, primary_release: Optional[ReleaseEntity] +) -> List[str]: + + tags = [] + + # tags + if biblio.license_slug and biblio.license_slug.lower().startswith("cc-"): + tags.append("oa") + if primary_release and primary_release.container: + container = primary_release.container + if container.extra: + if container.extra.get("doaj"): + tags.append("doaj") + tags.append("oa") + if container.extra.get("road"): + tags.append("road") + tags.append("oa") + if container.extra.get("szczepanski"): + tags.append("szczepanski") + tags.append("oa") + if container.extra.get("ia", {}).get("longtail_oa"): + tags.append("longtail") + tags.append("oa") + if container.extra.get("sherpa_romeo", {}).get("color") == "white": + tags.append("oa") + if container.extra.get("default_license", "").lower().startswith("cc-"): + tags.append("oa") + if container.extra.get("platform"): + # scielo, ojs, wordpress, etc + tags.append(container.extra["platform"].lower()) + if biblio.doi_prefix == "10.2307": + tags.append("jstor") + return list(set(tags)) + + def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: tags: List[str] = [] @@ -386,34 +422,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: # TODO: additional abstracts - # tags - if biblio.license_slug and biblio.license_slug.lower().startswith("cc-"): - tags.append("oa") - if primary_release and primary_release.container: - container = primary_release.container - if container.extra: - if container.extra.get("doaj"): - tags.append("doaj") - tags.append("oa") - if container.extra.get("road"): - tags.append("road") - tags.append("oa") - if container.extra.get("szczepanski"): - tags.append("szczepanski") - tags.append("oa") - if container.extra.get("ia", {}).get("longtail_oa"): - tags.append("longtail") - tags.append("oa") - if container.extra.get("sherpa_romeo", {}).get("color") == "white": - tags.append("oa") - if container.extra.get("default_license", "").lower().startswith("cc-"): - tags.append("oa") - if container.extra.get("platform"): - # scielo, ojs, wordpress, etc - tags.append(container.extra["platform"].lower()) - if biblio.doi_prefix == "10.2307": - tags.append("jstor") - tags = list(set(tags)) + tags = generate_tags(biblio, primary_release) # biorxiv/medrxiv hacks if ( |