From 25a4517383c03540bd3d7695e14f60c1bb20da2d Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Tue, 1 Jun 2021 17:20:22 +0200 Subject: add mapper: MapperReleaseContainerName --- skate/cmd/skate-map/main.go | 19 ++++++++++--------- skate/map.go | 27 ++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 14 deletions(-) (limited to 'skate') diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go index 572ecec..060e324 100644 --- a/skate/cmd/skate-map/main.go +++ b/skate/cmd/skate-map/main.go @@ -67,15 +67,16 @@ func main() { availableMappers := map[string]skate.Mapper{ // Add new mapper functions here. TODO: add more docs, and improve // composability, e.g. like middleware. - "id": skate.Identity, - "ff": skate.CreateFixedMapper(*extraValue), - "ti": skate.MapperTitle, - "tn": skate.MapperTitleNormalized, - "ty": skate.MapperTitleNysiis, - "ts": skate.MapperTitleSandcrawler, - "ru": skate.MapperURLFromRef, - "cni": skate.MapperContainerName, - "cns": skate.MapperContainerNameSandcrawler, + "id": skate.Identity, + "ff": skate.CreateFixedMapper(*extraValue), + "ti": skate.MapperTitle, + "tn": skate.MapperTitleNormalized, + "ty": skate.MapperTitleNysiis, + "ts": skate.MapperTitleSandcrawler, + "ru": skate.MapperURLFromRef, + "cni": skate.MapperContainerName, + "cns": skate.MapperContainerNameSandcrawler, + "rcns": skate.MapperReleaseContainerName, } if *logFile != "" { f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644) diff --git a/skate/map.go b/skate/map.go index f812464..a06b5ae 100644 --- a/skate/map.go +++ b/skate/map.go @@ -24,16 +24,16 @@ type TitleDoc struct { Title string `json:"title"` } -// ContainerNameDoc is a document with a container title. +// ContainerNameDoc is a (ref) document with a container title. type ContainerNameDoc struct { Biblio struct { ContainerName string `json:"container_name"` } `json:"biblio"` } -// PartialDoc for ref docs, that do not have DOI or title. E.g. we found 49701699 -// (NCVY), 36401044 (NCVYU), 29668363 (NCUY), and so on. Some examples: XXX -type PartialDoc struct { +// PartialRef for ref docs, that do not have DOI or title. E.g. we found 49701699 +// (NCVY), 36401044 (NCVYU), 29668363 (NCUY), and so on. Some examples: XXX. +type PartialRef struct { ContainerName string `json:"container_name"` Contribs []struct { // XXX: Need a way to sensibly compare sets of author names. @@ -200,7 +200,7 @@ func MapperTitleSandcrawler(p []byte) (fields [][]byte, err error) { // MapperContainerName extracts (container_name, doc). func MapperContainerName(p []byte) ([][]byte, error) { var ( - doc PartialDoc + doc PartialRef key []byte ) if err := json.Unmarshal(p, &doc); err != nil { @@ -240,6 +240,23 @@ func MapperURLFromRef(p []byte) (fields [][]byte, err error) { return fields, nil } +// MapperReleaseContainerName extracts a normalized container name. +func MapperReleaseContainerName(p []byte) (fields [][]byte, err error) { + var ( + doc Release + key []byte + ) + if err := json.Unmarshal(p, &doc); err != nil { + return nil, err + } + if doc.Container.Name != "" { + key = []byte(sandcrawlerSlugify(wsReplacer.Replace(strings.TrimSpace(doc.Container.Name)))) + } else if doc.ContainerName != "" { + key = []byte(sandcrawlerSlugify(wsReplacer.Replace(strings.TrimSpace(doc.ContainerName)))) + } + return [][]byte{key, p}, nil +} + // MapperPartial works on partial documents. func MapperPartial(p []byte) (fields [][]byte, err error) { return nil, nil -- cgit v1.2.3