aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-07 18:54:52 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-07 18:54:52 +0200
commitaa16994de8efc3c49fe79a9470ef1da22330bd67 (patch)
treef4cdd6f8ba4072a9b0cd86dc61ca4381353f3c30
parent037eb76dc34284b9946be7f805618b58b2e2a761 (diff)
downloadrefcat-aa16994de8efc3c49fe79a9470ef1da22330bd67.tar.gz
refcat-aa16994de8efc3c49fe79a9470ef1da22330bd67.zip
add mapper: isbn, normalized isbn13 from ext_ids.isbn
-rw-r--r--skate/cmd/skate-map/main.go1
-rw-r--r--skate/map.go34
2 files changed, 35 insertions, 0 deletions
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go
index 3d5630b..202f8bd 100644
--- a/skate/cmd/skate-map/main.go
+++ b/skate/cmd/skate-map/main.go
@@ -78,6 +78,7 @@ func main() {
"cns": skate.MapperContainerNameSandcrawler,
"rcns": skate.MapperReleaseContainerName,
"vcns": skate.MapperReleaseResolvedContainerName,
+ "isbn": skate.MapperOpenLibraryReleaseNormalizedISBN,
}
if *logFile != "" {
f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644)
diff --git a/skate/map.go b/skate/map.go
index b647480..17d9ed2 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -278,6 +278,40 @@ func MapperReleaseResolvedContainerName(p []byte) (fields [][]byte, err error) {
return [][]byte{key, p}, nil
}
+func MapperOpenLibraryReleaseNormalizedISBN(p []byte) (fields [][]byte, err error) {
+ var (
+ doc Release
+ key []byte
+ isbn13 string
+ )
+ if err := json.Unmarshal(p, &doc); err != nil {
+ return nil, err
+ }
+ // There can be 10 and 13 variants in the data, we always want 13.
+ for _, isbn := range doc.ExtIDs.ISBN {
+ if len(isbn) == 13 {
+ isbn13 = isbn
+ break
+ }
+ }
+ if isbn13 == "" {
+ // This is rarer, more expensive.
+ for _, isbn := range doc.ExtIDs.ISBN {
+ parsed := ParseIsbn(isbn)
+ if len(parsed) > 0 {
+ isbn13 = parsed[0]
+ break
+ }
+ }
+ }
+ if isbn13 == "" {
+ return nil, nil
+ } else {
+ key = []byte(isbn13)
+ }
+ return [][]byte{key, p}, nil
+}
+
// MapperPartial works on partial documents.
func MapperPartial(p []byte) (fields [][]byte, err error) {
// TODO: Group by some normlized container name or identifier.