diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-22 14:35:56 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-22 14:35:56 -0700 |
commit | 9aeb154a32849649ac3722c52106d9abd8ec5c07 (patch) | |
tree | 94a314fc8a313665bfa84ea090317120f2023818 | |
parent | 66fd8549b63e718b8cd2ed0252e96166905b999f (diff) | |
download | fatcat-9aeb154a32849649ac3722c52106d9abd8ec5c07.tar.gz fatcat-9aeb154a32849649ac3722c52106d9abd8ec5c07.zip |
count linked refs (not just raw refs) in elasticsearch
-rw-r--r-- | extra/elasticsearch/release_schema.json | 1 | ||||
-rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 3 | ||||
-rw-r--r-- | python/tests/transform_tests.py | 6 |
3 files changed, 10 insertions, 0 deletions
diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json index 63aab55c..7d1e1e53 100644 --- a/extra/elasticsearch/release_schema.json +++ b/extra/elasticsearch/release_schema.json @@ -59,6 +59,7 @@ "contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" }, "creator_ids": { "type": "keyword" }, "ref_count": { "type": "integer" }, + "ref_linked_count": { "type": "integer" }, "file_count": { "type": "integer" }, "fileset_count": { "type": "integer" }, "webcapture_count": { "type": "integer" }, diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 7f7f2f1b..c150572e 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -84,6 +84,9 @@ def release_to_elasticsearch(entity, force_bool=True): t['any_abstract'] = len(release.abstracts or []) > 0 t['ref_count'] = len(release.refs or []) + t['ref_linked_count'] = 0 + if release.refs: + t['ref_linked_count'] = len([1 for ref in release.refs if ref.target_release_id]) t['contrib_count'] = len(release.contribs or []) contrib_names = [] creator_ids = [] diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py index 27f54d92..c36137ba 100644 --- a/python/tests/transform_tests.py +++ b/python/tests/transform_tests.py @@ -22,6 +22,10 @@ def test_rich_elasticsearch_convert(crossref_importer): release_year=1234, license_slug="CC-BY-NC", ext_ids=ReleaseExtIds(), + refs=[ + ReleaseRef(), + ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"), + ], ) r.state = 'active' r.container = ContainerEntity( @@ -65,6 +69,8 @@ def test_rich_elasticsearch_convert(crossref_importer): assert es['in_dweb'] == True assert es['is_oa'] == True assert es['is_longtail_oa'] == False + assert es['ref_count'] == 2 + assert es['ref_linked_count'] == 1 def test_elasticsearch_from_json(): r = entity_from_json(open('./tests/files/math_universe.json', 'r').read(), ReleaseEntity) |