summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-22 14:35:56 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-22 14:35:56 -0700
commit9aeb154a32849649ac3722c52106d9abd8ec5c07 (patch)
tree94a314fc8a313665bfa84ea090317120f2023818
parent66fd8549b63e718b8cd2ed0252e96166905b999f (diff)
downloadfatcat-9aeb154a32849649ac3722c52106d9abd8ec5c07.tar.gz
fatcat-9aeb154a32849649ac3722c52106d9abd8ec5c07.zip
count linked refs (not just raw refs) in elasticsearch
-rw-r--r--extra/elasticsearch/release_schema.json1
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py3
-rw-r--r--python/tests/transform_tests.py6
3 files changed, 10 insertions, 0 deletions
diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json
index 63aab55c..7d1e1e53 100644
--- a/extra/elasticsearch/release_schema.json
+++ b/extra/elasticsearch/release_schema.json
@@ -59,6 +59,7 @@
"contrib_names": { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch" },
"creator_ids": { "type": "keyword" },
"ref_count": { "type": "integer" },
+ "ref_linked_count": { "type": "integer" },
"file_count": { "type": "integer" },
"fileset_count": { "type": "integer" },
"webcapture_count": { "type": "integer" },
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index 7f7f2f1b..c150572e 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -84,6 +84,9 @@ def release_to_elasticsearch(entity, force_bool=True):
t['any_abstract'] = len(release.abstracts or []) > 0
t['ref_count'] = len(release.refs or [])
+ t['ref_linked_count'] = 0
+ if release.refs:
+ t['ref_linked_count'] = len([1 for ref in release.refs if ref.target_release_id])
t['contrib_count'] = len(release.contribs or [])
contrib_names = []
creator_ids = []
diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py
index 27f54d92..c36137ba 100644
--- a/python/tests/transform_tests.py
+++ b/python/tests/transform_tests.py
@@ -22,6 +22,10 @@ def test_rich_elasticsearch_convert(crossref_importer):
release_year=1234,
license_slug="CC-BY-NC",
ext_ids=ReleaseExtIds(),
+ refs=[
+ ReleaseRef(),
+ ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"),
+ ],
)
r.state = 'active'
r.container = ContainerEntity(
@@ -65,6 +69,8 @@ def test_rich_elasticsearch_convert(crossref_importer):
assert es['in_dweb'] == True
assert es['is_oa'] == True
assert es['is_longtail_oa'] == False
+ assert es['ref_count'] == 2
+ assert es['ref_linked_count'] == 1
def test_elasticsearch_from_json():
r = entity_from_json(open('./tests/files/math_universe.json', 'r').read(), ReleaseEntity)