aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-01-29 23:26:58 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-01-29 23:26:58 -0800
commite98f389a53d886b4fa8f0237b90b086999770f78 (patch)
tree21019802437b801cfdd9754f6a1bdc6b565779ad
parentbf718fd076476c1a54e80ca88cd02ede606ab6f3 (diff)
downloadfatcat-e98f389a53d886b4fa8f0237b90b086999770f78.tar.gz
fatcat-e98f389a53d886b4fa8f0237b90b086999770f78.zip
elastic schema fixes
-rw-r--r--extra/elasticsearch/file_schema.json12
-rw-r--r--extra/elasticsearch/release_schema.json2
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py5
3 files changed, 12 insertions, 7 deletions
diff --git a/extra/elasticsearch/file_schema.json b/extra/elasticsearch/file_schema.json
index 66d81e0b..2a7e5be0 100644
--- a/extra/elasticsearch/file_schema.json
+++ b/extra/elasticsearch/file_schema.json
@@ -13,7 +13,7 @@
}
},
"mappings": {
- "changelog": {
+ "file": {
"properties": {
"ident": { "type": "keyword", "doc_values": false },
"state": { "type": "keyword" },
@@ -33,13 +33,13 @@
"in_ia": { "type": "boolean" },
"release_id": { "type": "alias", "path": "release_ids" },
- "sha1hex": { "type": "alias", "path": "sha1hex" },
- "sha256hex": { "type": "alias", "path": "sha256hex" },
- "md5hex": { "type": "alias", "path": "md5hex" },
+ "sha1hex": { "type": "alias", "path": "sha1" },
+ "sha256hex": { "type": "alias", "path": "sha256" },
+ "md5hex": { "type": "alias", "path": "md5" },
"size": { "type": "alias", "path": "size_bytes" },
"domain": { "type": "alias", "path": "domains" },
- "host": { "type": "alias", "path": "host" },
- "rel": { "type": "alias", "path": "rel" }
+ "host": { "type": "alias", "path": "hosts" },
+ "rel": { "type": "alias", "path": "rels" }
}
}
}
diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json
index 2b67c5f5..3d301dba 100644
--- a/extra/elasticsearch/release_schema.json
+++ b/extra/elasticsearch/release_schema.json
@@ -99,7 +99,7 @@
"affilation": { "type": "alias", "path": "affiliations" },
"ror": { "type": "alias", "path": "affiliation_rors" },
- "creator_id": { "type": "alias", "path": "creator_id" },
+ "creator_id": { "type": "alias", "path": "creator_ids" },
"ref_release_id": { "type": "alias", "path": "ref_release_ids" },
"author": { "type": "alias", "path": "contrib_names" },
"journal": { "type": "alias", "path": "container_name" },
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index f0146d01..42669bbf 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -416,6 +416,11 @@ def _type_of_edit(edit):
def changelog_to_elasticsearch(entity):
+ """
+ Note that this importer requires expanded fill info to work. Calling code
+ may need to re-fetch editgroup from API to get the 'editor' field. Some of
+ the old kafka feed content doesn't includes editor in particular.
+ """
editgroup = entity.editgroup
t = dict(