summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-01-29 21:52:33 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-01-29 21:59:05 -0800
commit5d458a3df7e58e6551d8ec72979e376c62fdd2f7 (patch)
tree4ebbaa95151c94817cb13d4e7a8e770c804694c7 /python/fatcat_tools
parente047fbe1a9c495e86a6757d44eb32c9109a1b753 (diff)
downloadfatcat-5d458a3df7e58e6551d8ec72979e376c62fdd2f7.tar.gz
fatcat-5d458a3df7e58e6551d8ec72979e376c62fdd2f7.zip
fix some transform bugs, add some tests
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py14
1 files changed, 8 insertions, 6 deletions
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index b997796d..812cd1fd 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -20,6 +20,7 @@ def test_check_kbart():
assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) == False
assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) == True
+
def release_to_elasticsearch(entity, force_bool=True):
"""
Converts from an entity model/schema to elasticsearch oriented schema.
@@ -233,8 +234,8 @@ def release_to_elasticsearch(entity, force_bool=True):
if release.pages:
first = release.pages.split('-')[0]
first = first.replace('p', '')
- if release.pages.isdigit():
- t['first_page'] = release.pages
+ if first.isdigit():
+ t['first_page'] = first
# TODO: non-numerical first pages
t['ia_microfilm_url'] = None
@@ -243,12 +244,12 @@ def release_to_elasticsearch(entity, force_bool=True):
# need extra metadata in the container extra field.
# special case as a demo for now.
if release.container_id == "hl5g6d5msjcl7hlbyyvcsbhc2u" \
- and release.year in (2011, 2013) \
- and release.volume.isdigit() \
+ and release.release_year in (2011, 2013) \
+ and release.issue.isdigit() \
and t['first_page']:
t['ia_microfilm_url'] = "https://archive.org/details/sim_bjog_{}-{:02d}/page/n{}".format(
- release.year,
- release.volume - 1,
+ release.release_year,
+ int(release.issue) - 1,
t['first_page'],
)
@@ -299,6 +300,7 @@ def release_to_elasticsearch(entity, force_bool=True):
return t
+
def container_to_elasticsearch(entity, force_bool=True):
"""
Converts from an entity model/schema to elasticsearch oriented schema.