diff options
| -rw-r--r-- | extra/elasticsearch/file_schema.json | 12 | ||||
| -rw-r--r-- | extra/elasticsearch/release_schema.json | 2 | ||||
| -rw-r--r-- | python/fatcat_tools/transforms/elasticsearch.py | 5 | 
3 files changed, 12 insertions, 7 deletions
diff --git a/extra/elasticsearch/file_schema.json b/extra/elasticsearch/file_schema.json index 66d81e0b..2a7e5be0 100644 --- a/extra/elasticsearch/file_schema.json +++ b/extra/elasticsearch/file_schema.json @@ -13,7 +13,7 @@      }  },  "mappings": { -    "changelog": { +    "file": {          "properties": {              "ident":            { "type": "keyword", "doc_values": false },              "state":            { "type": "keyword" }, @@ -33,13 +33,13 @@              "in_ia":            { "type": "boolean" },              "release_id":       { "type": "alias", "path": "release_ids" }, -            "sha1hex":          { "type": "alias", "path": "sha1hex" }, -            "sha256hex":        { "type": "alias", "path": "sha256hex" }, -            "md5hex":           { "type": "alias", "path": "md5hex" }, +            "sha1hex":          { "type": "alias", "path": "sha1" }, +            "sha256hex":        { "type": "alias", "path": "sha256" }, +            "md5hex":           { "type": "alias", "path": "md5" },              "size":             { "type": "alias", "path": "size_bytes" },              "domain":           { "type": "alias", "path": "domains" }, -            "host":             { "type": "alias", "path": "host" }, -            "rel":              { "type": "alias", "path": "rel" } +            "host":             { "type": "alias", "path": "hosts" }, +            "rel":              { "type": "alias", "path": "rels" }          }      }  } diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json index 2b67c5f5..3d301dba 100644 --- a/extra/elasticsearch/release_schema.json +++ b/extra/elasticsearch/release_schema.json @@ -99,7 +99,7 @@              "affilation":     { "type": "alias", "path": "affiliations" },              "ror":            { "type": "alias", "path": "affiliation_rors" }, -            "creator_id":     { "type": "alias", "path": "creator_id" }, +            "creator_id":     { "type": "alias", "path": "creator_ids" },              "ref_release_id": { "type": "alias", "path": "ref_release_ids" },              "author":         { "type": "alias", "path": "contrib_names" },              "journal":        { "type": "alias", "path": "container_name" }, diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index f0146d01..42669bbf 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -416,6 +416,11 @@ def _type_of_edit(edit):  def changelog_to_elasticsearch(entity): +    """ +    Note that this importer requires expanded fill info to work. Calling code +    may need to re-fetch editgroup from API to get the 'editor' field. Some of +    the old kafka feed content doesn't includes editor in particular. +    """      editgroup = entity.editgroup      t = dict(  | 
