From 9aeb154a32849649ac3722c52106d9abd8ec5c07 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 22 May 2019 14:35:56 -0700 Subject: count linked refs (not just raw refs) in elasticsearch --- extra/elasticsearch/release_schema.json | 1 + python/fatcat_tools/transforms/elasticsearch.py | 3 +++ python/tests/transform_tests.py | 6 ++++++ 3 files changed, 10 insertions(+) diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json index 63aab55c..7d1e1e53 100644 --- a/extra/elasticsearch/release_schema.json +++ b/extra/elasticsearch/release_schema.json @@ -59,6 +59,7 @@ "contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, "creator_ids": { "type": "keyword" }, "ref_count": { "type": "integer" }, + "ref_linked_count": { "type": "integer" }, "file_count": { "type": "integer" }, "fileset_count": { "type": "integer" }, "webcapture_count": { "type": "integer" }, diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 7f7f2f1b..c150572e 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -84,6 +84,9 @@ def release_to_elasticsearch(entity, force_bool=True): t['any_abstract'] = len(release.abstracts or []) > 0 t['ref_count'] = len(release.refs or []) + t['ref_linked_count'] = 0 + if release.refs: + t['ref_linked_count'] = len([1 for ref in release.refs if ref.target_release_id]) t['contrib_count'] = len(release.contribs or []) contrib_names = [] creator_ids = [] diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py index 27f54d92..c36137ba 100644 --- a/python/tests/transform_tests.py +++ b/python/tests/transform_tests.py @@ -22,6 +22,10 @@ def test_rich_elasticsearch_convert(crossref_importer): release_year=1234, license_slug="CC-BY-NC", ext_ids=ReleaseExtIds(), + refs=[ + ReleaseRef(), + ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"), + ], ) r.state = 'active' r.container = ContainerEntity( @@ -65,6 +69,8 @@ def test_rich_elasticsearch_convert(crossref_importer): assert es['in_dweb'] == True assert es['is_oa'] == True assert es['is_longtail_oa'] == False + assert es['ref_count'] == 2 + assert es['ref_linked_count'] == 1 def test_elasticsearch_from_json(): r = entity_from_json(open('./tests/files/math_universe.json', 'r').read(), ReleaseEntity) -- cgit v1.2.3