From e98f389a53d886b4fa8f0237b90b086999770f78 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 29 Jan 2020 23:26:58 -0800 Subject: elastic schema fixes --- extra/elasticsearch/file_schema.json | 12 ++++++------ extra/elasticsearch/release_schema.json | 2 +- python/fatcat_tools/transforms/elasticsearch.py | 5 +++++ 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/extra/elasticsearch/file_schema.json b/extra/elasticsearch/file_schema.json index 66d81e0b..2a7e5be0 100644 --- a/extra/elasticsearch/file_schema.json +++ b/extra/elasticsearch/file_schema.json @@ -13,7 +13,7 @@ } }, "mappings": { - "changelog": { + "file": { "properties": { "ident": { "type": "keyword", "doc_values": false }, "state": { "type": "keyword" }, @@ -33,13 +33,13 @@ "in_ia": { "type": "boolean" }, "release_id": { "type": "alias", "path": "release_ids" }, - "sha1hex": { "type": "alias", "path": "sha1hex" }, - "sha256hex": { "type": "alias", "path": "sha256hex" }, - "md5hex": { "type": "alias", "path": "md5hex" }, + "sha1hex": { "type": "alias", "path": "sha1" }, + "sha256hex": { "type": "alias", "path": "sha256" }, + "md5hex": { "type": "alias", "path": "md5" }, "size": { "type": "alias", "path": "size_bytes" }, "domain": { "type": "alias", "path": "domains" }, - "host": { "type": "alias", "path": "host" }, - "rel": { "type": "alias", "path": "rel" } + "host": { "type": "alias", "path": "hosts" }, + "rel": { "type": "alias", "path": "rels" } } } } diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json index 2b67c5f5..3d301dba 100644 --- a/extra/elasticsearch/release_schema.json +++ b/extra/elasticsearch/release_schema.json @@ -99,7 +99,7 @@ "affilation": { "type": "alias", "path": "affiliations" }, "ror": { "type": "alias", "path": "affiliation_rors" }, - "creator_id": { "type": "alias", "path": "creator_id" }, + "creator_id": { "type": "alias", "path": "creator_ids" }, "ref_release_id": { "type": "alias", "path": "ref_release_ids" }, "author": { "type": "alias", "path": "contrib_names" }, "journal": { "type": "alias", "path": "container_name" }, diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index f0146d01..42669bbf 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -416,6 +416,11 @@ def _type_of_edit(edit): def changelog_to_elasticsearch(entity): + """ + Note that this importer requires expanded fill info to work. Calling code + may need to re-fetch editgroup from API to get the 'editor' field. Some of + the old kafka feed content doesn't includes editor in particular. + """ editgroup = entity.editgroup t = dict( -- cgit v1.2.3