diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-06-07 18:54:52 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-06-07 18:54:52 +0200 |
commit | aa16994de8efc3c49fe79a9470ef1da22330bd67 (patch) | |
tree | f4cdd6f8ba4072a9b0cd86dc61ca4381353f3c30 | |
parent | 037eb76dc34284b9946be7f805618b58b2e2a761 (diff) | |
download | refcat-aa16994de8efc3c49fe79a9470ef1da22330bd67.tar.gz refcat-aa16994de8efc3c49fe79a9470ef1da22330bd67.zip |
add mapper: isbn, normalized isbn13 from ext_ids.isbn
-rw-r--r-- | skate/cmd/skate-map/main.go | 1 | ||||
-rw-r--r-- | skate/map.go | 34 |
2 files changed, 35 insertions, 0 deletions
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go index 3d5630b..202f8bd 100644 --- a/skate/cmd/skate-map/main.go +++ b/skate/cmd/skate-map/main.go @@ -78,6 +78,7 @@ func main() { "cns": skate.MapperContainerNameSandcrawler, "rcns": skate.MapperReleaseContainerName, "vcns": skate.MapperReleaseResolvedContainerName, + "isbn": skate.MapperOpenLibraryReleaseNormalizedISBN, } if *logFile != "" { f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644) diff --git a/skate/map.go b/skate/map.go index b647480..17d9ed2 100644 --- a/skate/map.go +++ b/skate/map.go @@ -278,6 +278,40 @@ func MapperReleaseResolvedContainerName(p []byte) (fields [][]byte, err error) { return [][]byte{key, p}, nil } +func MapperOpenLibraryReleaseNormalizedISBN(p []byte) (fields [][]byte, err error) { + var ( + doc Release + key []byte + isbn13 string + ) + if err := json.Unmarshal(p, &doc); err != nil { + return nil, err + } + // There can be 10 and 13 variants in the data, we always want 13. + for _, isbn := range doc.ExtIDs.ISBN { + if len(isbn) == 13 { + isbn13 = isbn + break + } + } + if isbn13 == "" { + // This is rarer, more expensive. + for _, isbn := range doc.ExtIDs.ISBN { + parsed := ParseIsbn(isbn) + if len(parsed) > 0 { + isbn13 = parsed[0] + break + } + } + } + if isbn13 == "" { + return nil, nil + } else { + key = []byte(isbn13) + } + return [][]byte{key, p}, nil +} + // MapperPartial works on partial documents. func MapperPartial(p []byte) (fields [][]byte, err error) { // TODO: Group by some normlized container name or identifier. |