From 5d458a3df7e58e6551d8ec72979e376c62fdd2f7 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 29 Jan 2020 21:52:33 -0800 Subject: fix some transform bugs, add some tests --- python/fatcat_tools/transforms/elasticsearch.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index b997796d..812cd1fd 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -20,6 +20,7 @@ def test_check_kbart(): assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) == False assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) == True + def release_to_elasticsearch(entity, force_bool=True): """ Converts from an entity model/schema to elasticsearch oriented schema. @@ -233,8 +234,8 @@ def release_to_elasticsearch(entity, force_bool=True): if release.pages: first = release.pages.split('-')[0] first = first.replace('p', '') - if release.pages.isdigit(): - t['first_page'] = release.pages + if first.isdigit(): + t['first_page'] = first # TODO: non-numerical first pages t['ia_microfilm_url'] = None @@ -243,12 +244,12 @@ def release_to_elasticsearch(entity, force_bool=True): # need extra metadata in the container extra field. # special case as a demo for now. if release.container_id == "hl5g6d5msjcl7hlbyyvcsbhc2u" \ - and release.year in (2011, 2013) \ - and release.volume.isdigit() \ + and release.release_year in (2011, 2013) \ + and release.issue.isdigit() \ and t['first_page']: t['ia_microfilm_url'] = "https://archive.org/details/sim_bjog_{}-{:02d}/page/n{}".format( - release.year, - release.volume - 1, + release.release_year, + int(release.issue) - 1, t['first_page'], ) @@ -299,6 +300,7 @@ def release_to_elasticsearch(entity, force_bool=True): return t + def container_to_elasticsearch(entity, force_bool=True): """ Converts from an entity model/schema to elasticsearch oriented schema. -- cgit v1.2.3