diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-01-16 16:21:21 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-01-16 16:21:21 -0800 |
commit | 703ce536e20c1c25f38cab575fe645b56fb692fa (patch) | |
tree | e2ea342226b09d6820af0a05a90768e08ffcd12f | |
parent | 1ec605455589fefc6f96d0bbad625afa20769eb2 (diff) | |
download | fatcat-scholar-703ce536e20c1c25f38cab575fe645b56fb692fa.tar.gz fatcat-scholar-703ce536e20c1c25f38cab575fe645b56fb692fa.zip |
small corrections to schema/transform
-rw-r--r-- | fatcat_scholar/schema.py | 5 | ||||
-rw-r--r-- | fatcat_scholar/transform.py | 6 |
2 files changed, 8 insertions, 3 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index b5a4749..2f0f04e 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -480,7 +480,8 @@ def es_abstracts_from_release(release: ReleaseEntity) -> List[ScholarAbstract]: def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: if release.container: - publisher = release.publisher + publisher = release.container.publisher or release.publisher + publisher_type = release.container.extra.get("publisher_type", None) container_name = release.container.name container_original_name = ( release.container.extra and release.container.extra.get("original_name") @@ -500,6 +501,7 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: issns = list(set(issns)) else: publisher = release.extra and release.extra.get("publisher") + publisher_type = None container_name = release.extra and release.extra.get("container_name") container_original_name = None container_ident = None @@ -545,6 +547,7 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: oai_id=release.ext_ids.oai, license_slug=release.license_slug, publisher=publisher, + publisher_type=publisher_type, container_name=clean_str(container_name), container_original_name=container_original_name, container_ident=container_ident, diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index b65197f..417fe38 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -277,7 +277,9 @@ def biblio_metadata_hacks(biblio: ScholarBiblio) -> ScholarBiblio: # noqa: C901 """ # valid year - if biblio.release_year and biblio.release_year > 2025: + if biblio.release_year and ( + biblio.release_year > 2025 or biblio.release_year < 1500 + ): biblio.release_year = None biblio.release_date = None @@ -386,7 +388,7 @@ def generate_tags( if container.extra.get("platform"): # scielo, ojs, wordpress, etc tags.append(container.extra["platform"].lower()) - if biblio.doi_prefix == "10.2307": + if biblio.doi_prefix == "10.2307" or biblio.jstor_id: tags.append("jstor") return list(set(tags)) |