3 files changed, 32 insertions, 19 deletions
diff --git a/fatcat_scholar/es_transform.py b/fatcat_scholar/es_transform.py
index 1f47e2c..089b155 100644
--- a/fatcat_scholar/es_transform.py
+++ b/fatcat_scholar/es_transform.py
@@ -65,6 +65,7 @@ class ScholarBiblio(BaseModel):
     container_original_name: Optional[str]
     container_ident: Optional[str]
     container_issnl: Optional[str]
+    container_wikidata_qid: Optional[str]
     issns: List[str]
     container_type: Optional[str]
     contrib_count: Optional[int]
@@ -112,10 +113,10 @@ class ScholarRelease(BaseModel):
     container_type: Optional[str]
 
 class ScholarSim(BaseModel):
-    ia_item: str
-    ia_collection: str
+    issue_item: str
+    pub_collection: str
+    sim_pubid: str
     first_page: Optional[str]
-    pub_id: str
 
 class ScholarAbstract(BaseModel):
     body: str
diff --git a/proposals/work_schema.md b/proposals/work_schema.md
index 1e0f272..933e750 100644
--- a/proposals/work_schema.md
+++ b/proposals/work_schema.md
@@ -1,19 +1,21 @@
 
 ## Top-Level
 
-- type: _doc
-- key: keyword
-- key_type: keyword (work or page)
-- `work_id`
-- biblio: obj
-- fulltext: obj
-- sim: obj
-- abstracts: nested
+- type: `_doc` (aka, no type, `include_type_name=false`)
+- key: keyword (same as `_id`)
+- `doc_type`: keyword (work or page)
+- `doc_index_ts`: timestamp when document indexed
+- `work_id`: fatcat work ident (optional)
+
+- `biblio`: obj
+- `fulltext`: obj
+- `ia_sim`: obj
+- `abstracts`: nested
     body
     lang
-- releases: nested (TBD)
-- access
-- tags: array of keywords
+- `releases`: nested (TBD)
+- `access`
+- `tags`: array of keywords
 
 TODO:
 - summary fields to index "everything" into?
@@ -50,11 +52,14 @@ NEW:
 - `container_name` (etc)
 - `container_id`
 - `container_issnl`
-- `container_issn` (array)
+- `container_wikidata_qid`
+- `issns` (array)
 - `contrib_names`
 - `affiliations`
 - `creator_ids`
 
+TODO: should all external identifiers go under `releases` instead of `biblio`? Or some duplicated?
+
 ## Fulltext
 
 - `status`: web, sim, shadow
@@ -81,6 +86,12 @@ Only index one abstract per language.
 Enough details to construct a link or do a lookup or whatever. Note that might
 be doing CDL status lookups on SERP pages.
 
+- `issue_item`: str
+- `pub_collection`: str
+- `sim_pubid`: str
+- `first_page`: str
+
+
 Also pass-through archive.org metadata here (collection-level and item-level)
 
 ## Access
diff --git a/schema/scholar_fulltext.v01.json b/schema/scholar_fulltext.v01.json
index 613ca1e..e09b00c 100644
--- a/schema/scholar_fulltext.v01.json
+++ b/schema/scholar_fulltext.v01.json
@@ -98,6 +98,7 @@
             "container_original_name":       { "type": "text", "index": true, "analyzer": "textIcu", "search_analyzer":"textIcuSearch", "copy_to": ["biblio_all", "everything"] },
             "container_ident":      { "type": "keyword", "normalizer": "default" },
             "container_issnl":      { "type": "keyword", "normalizer": "default" },
+            "container_wikidata_qid": { "type": "keyword", "normalizer": "default" },
             "issns":                { "type": "keyword", "normalizer": "default" },
             "container_type":       { "type": "keyword", "normalizer": "default" },
             "contrib_count":        { "type": "integer" },
@@ -128,10 +129,10 @@
           "type": "object",
           "dynamic": false,
           "properties": {
-            "ia_item":          { "type": "keyword", "normalizer": "default" },
-            "ia_collection":    { "type": "keyword", "normalizer": "default" },
-            "first_page":       { "type": "keyword", "normalizer": "default" },
-            "pub_id":           { "type": "keyword", "normalizer": "default" }
+            "issue_item":       { "type": "keyword", "normalizer": "default" },
+            "pub_collection":   { "type": "keyword", "normalizer": "default" },
+            "sim_pubid":        { "type": "keyword", "normalizer": "default" },
+            "first_page":       { "type": "keyword", "normalizer": "default" }
           }
         },