diff options
Diffstat (limited to 'skate/map.go')
-rw-r--r-- | skate/map.go | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/skate/map.go b/skate/map.go index 27bc627..737c9af 100644 --- a/skate/map.go +++ b/skate/map.go @@ -19,11 +19,16 @@ var ( ErrMissingFieldName = errors.New("missing field name") ) -// Title is a document with a title. +// TitleDoc is a document with a title. type TitleDoc struct { Title string `json:"title"` } +// ContainerTitleDoc is a document with a title. +type ContainerTitleDoc struct { + ContainerTitle string `json:"container_title"` +} + // PartialDoc for docs, that do not have DOI or title. E.g. we found 49701699 // (NCVY), 36401044 (NCVYU), 29668363 (NCUY), and so on. Some examples: XXX type PartialDoc struct { @@ -190,6 +195,31 @@ func MapperTitleSandcrawler(p []byte) (fields [][]byte, err error) { return fields, nil } +// MapperContainerTitle extracts (container_title, doc). +func MapperContainerTitle(p []byte) ([][]byte, error) { + var ( + doc ContainerTitleDoc + key []byte + ) + if err := json.Unmarshal(p, &doc); err != nil { + return nil, err + } else { + key = []byte(wsReplacer.Replace(strings.TrimSpace(doc.ContainerTitle))) + } + return [][]byte{key, p}, nil +} + +// MapperContainerTitleSandcrawler extracts (container_title, doc). +func MapperContainerTitleSandcrawler(p []byte) (fields [][]byte, err error) { + if fields, err = MapperContainerTitle(p); err != nil { + return nil, err + } + key := string(fields[0]) + key = sandcrawlerSlugify(wsReplacer.Replace(strings.TrimSpace(key))) + fields[0] = []byte(key) + return fields, nil +} + // MapperURLFromRef extracts the (work ident, release ident, url, doc). // Previously: parallel -j 16 --block 100M --pipe "jq -rc '[.work_ident, // .release_ident, .biblio.url?] | @tsv'" ... |