diff options
Diffstat (limited to 'skate')
-rw-r--r-- | skate/cmd/skate-map/main.go | 1 | ||||
-rw-r--r-- | skate/map.go | 34 |
2 files changed, 35 insertions, 0 deletions
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go index 3d5630b..202f8bd 100644 --- a/skate/cmd/skate-map/main.go +++ b/skate/cmd/skate-map/main.go @@ -78,6 +78,7 @@ func main() { "cns": skate.MapperContainerNameSandcrawler, "rcns": skate.MapperReleaseContainerName, "vcns": skate.MapperReleaseResolvedContainerName, + "isbn": skate.MapperOpenLibraryReleaseNormalizedISBN, } if *logFile != "" { f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644) diff --git a/skate/map.go b/skate/map.go index b647480..17d9ed2 100644 --- a/skate/map.go +++ b/skate/map.go @@ -278,6 +278,40 @@ func MapperReleaseResolvedContainerName(p []byte) (fields [][]byte, err error) { return [][]byte{key, p}, nil } +func MapperOpenLibraryReleaseNormalizedISBN(p []byte) (fields [][]byte, err error) { + var ( + doc Release + key []byte + isbn13 string + ) + if err := json.Unmarshal(p, &doc); err != nil { + return nil, err + } + // There can be 10 and 13 variants in the data, we always want 13. + for _, isbn := range doc.ExtIDs.ISBN { + if len(isbn) == 13 { + isbn13 = isbn + break + } + } + if isbn13 == "" { + // This is rarer, more expensive. + for _, isbn := range doc.ExtIDs.ISBN { + parsed := ParseIsbn(isbn) + if len(parsed) > 0 { + isbn13 = parsed[0] + break + } + } + } + if isbn13 == "" { + return nil, nil + } else { + key = []byte(isbn13) + } + return [][]byte{key, p}, nil +} + // MapperPartial works on partial documents. func MapperPartial(p []byte) (fields [][]byte, err error) { // TODO: Group by some normlized container name or identifier. |