From 703ce536e20c1c25f38cab575fe645b56fb692fa Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sat, 16 Jan 2021 16:21:21 -0800 Subject: small corrections to schema/transform --- fatcat_scholar/schema.py | 5 ++++- fatcat_scholar/transform.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fatcat_scholar') diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index b5a4749..2f0f04e 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -480,7 +480,8 @@ def es_abstracts_from_release(release: ReleaseEntity) -> List[ScholarAbstract]: def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: if release.container: - publisher = release.publisher + publisher = release.container.publisher or release.publisher + publisher_type = release.container.extra.get("publisher_type", None) container_name = release.container.name container_original_name = ( release.container.extra and release.container.extra.get("original_name") @@ -500,6 +501,7 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: issns = list(set(issns)) else: publisher = release.extra and release.extra.get("publisher") + publisher_type = None container_name = release.extra and release.extra.get("container_name") container_original_name = None container_ident = None @@ -545,6 +547,7 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: oai_id=release.ext_ids.oai, license_slug=release.license_slug, publisher=publisher, + publisher_type=publisher_type, container_name=clean_str(container_name), container_original_name=container_original_name, container_ident=container_ident, diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index b65197f..417fe38 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -277,7 +277,9 @@ def biblio_metadata_hacks(biblio: ScholarBiblio) -> ScholarBiblio: # noqa: C901 """ # valid year - if biblio.release_year and biblio.release_year > 2025: + if biblio.release_year and ( + biblio.release_year > 2025 or biblio.release_year < 1500 + ): biblio.release_year = None biblio.release_date = None @@ -386,7 +388,7 @@ def generate_tags( if container.extra.get("platform"): # scielo, ojs, wordpress, etc tags.append(container.extra["platform"].lower()) - if biblio.doi_prefix == "10.2307": + if biblio.doi_prefix == "10.2307" or biblio.jstor_id: tags.append("jstor") return list(set(tags)) -- cgit v1.2.3