diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-01-29 21:52:33 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-01-29 21:59:05 -0800 |
commit | 5d458a3df7e58e6551d8ec72979e376c62fdd2f7 (patch) | |
tree | 4ebbaa95151c94817cb13d4e7a8e770c804694c7 /python/fatcat_tools/transforms | |
parent | e047fbe1a9c495e86a6757d44eb32c9109a1b753 (diff) | |
download | fatcat-5d458a3df7e58e6551d8ec72979e376c62fdd2f7.tar.gz fatcat-5d458a3df7e58e6551d8ec72979e376c62fdd2f7.zip |
fix some transform bugs, add some tests
Diffstat (limited to 'python/fatcat_tools/transforms')
-rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index b997796d..812cd1fd 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -20,6 +20,7 @@ def test_check_kbart(): assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) == False assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) == True + def release_to_elasticsearch(entity, force_bool=True): """ Converts from an entity model/schema to elasticsearch oriented schema. @@ -233,8 +234,8 @@ def release_to_elasticsearch(entity, force_bool=True): if release.pages: first = release.pages.split('-')[0] first = first.replace('p', '') - if release.pages.isdigit(): - t['first_page'] = release.pages + if first.isdigit(): + t['first_page'] = first # TODO: non-numerical first pages t['ia_microfilm_url'] = None @@ -243,12 +244,12 @@ def release_to_elasticsearch(entity, force_bool=True): # need extra metadata in the container extra field. # special case as a demo for now. if release.container_id == "hl5g6d5msjcl7hlbyyvcsbhc2u" \ - and release.year in (2011, 2013) \ - and release.volume.isdigit() \ + and release.release_year in (2011, 2013) \ + and release.issue.isdigit() \ and t['first_page']: t['ia_microfilm_url'] = "https://archive.org/details/sim_bjog_{}-{:02d}/page/n{}".format( - release.year, - release.volume - 1, + release.release_year, + int(release.issue) - 1, t['first_page'], ) @@ -299,6 +300,7 @@ def release_to_elasticsearch(entity, force_bool=True): return t + def container_to_elasticsearch(entity, force_bool=True): """ Converts from an entity model/schema to elasticsearch oriented schema. |