aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
Diffstat (limited to 'skate')
-rw-r--r--skate/cmd/skate-map/main.go1
-rw-r--r--skate/map.go34
2 files changed, 35 insertions, 0 deletions
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go
index 3d5630b..202f8bd 100644
--- a/skate/cmd/skate-map/main.go
+++ b/skate/cmd/skate-map/main.go
@@ -78,6 +78,7 @@ func main() {
"cns": skate.MapperContainerNameSandcrawler,
"rcns": skate.MapperReleaseContainerName,
"vcns": skate.MapperReleaseResolvedContainerName,
+ "isbn": skate.MapperOpenLibraryReleaseNormalizedISBN,
}
if *logFile != "" {
f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644)
diff --git a/skate/map.go b/skate/map.go
index b647480..17d9ed2 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -278,6 +278,40 @@ func MapperReleaseResolvedContainerName(p []byte) (fields [][]byte, err error) {
return [][]byte{key, p}, nil
}
+func MapperOpenLibraryReleaseNormalizedISBN(p []byte) (fields [][]byte, err error) {
+ var (
+ doc Release
+ key []byte
+ isbn13 string
+ )
+ if err := json.Unmarshal(p, &doc); err != nil {
+ return nil, err
+ }
+ // There can be 10 and 13 variants in the data, we always want 13.
+ for _, isbn := range doc.ExtIDs.ISBN {
+ if len(isbn) == 13 {
+ isbn13 = isbn
+ break
+ }
+ }
+ if isbn13 == "" {
+ // This is rarer, more expensive.
+ for _, isbn := range doc.ExtIDs.ISBN {
+ parsed := ParseIsbn(isbn)
+ if len(parsed) > 0 {
+ isbn13 = parsed[0]
+ break
+ }
+ }
+ }
+ if isbn13 == "" {
+ return nil, nil
+ } else {
+ key = []byte(isbn13)
+ }
+ return [][]byte{key, p}, nil
+}
+
// MapperPartial works on partial documents.
func MapperPartial(p []byte) (fields [][]byte, err error) {
// TODO: Group by some normlized container name or identifier.