From d1c3672fd878ea365eb87de86da8351dad94e2dc Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 15 Jul 2021 17:51:12 +0200 Subject: mapper: add cdxu --- skate/cmd/skate-map/main.go | 1 + skate/map.go | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'skate') diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go index 5b86250..f87c02f 100644 --- a/skate/cmd/skate-map/main.go +++ b/skate/cmd/skate-map/main.go @@ -80,6 +80,7 @@ func main() { "rcns": skate.MapperReleaseContainerName, "vcns": skate.MapperReleaseResolvedContainerName, "isbn": skate.MapperOpenLibraryReleaseNormalizedISBN, + "cdxu": skate.MapperCdxSummary, } if *logFile != "" { f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644) diff --git a/skate/map.go b/skate/map.go index 1a8feef..ef9c018 100644 --- a/skate/map.go +++ b/skate/map.go @@ -44,6 +44,17 @@ type PartialRef struct { Year string `json:"release_year"` } +// cdxSummary is an ad-hoc CDX summary format, created currently by skate-cdx-lookup. +type cdxSummary struct { + Line string `json:"line"` + NumRows int64 `json:"numRows"` + Summary struct { + Delta int64 `json:"delta"` + Last string `json:"last"` + Ok string `json:"ok"` + } `json:"summary"` +} + // Mapper maps a blob to an arbitrary number of fields, e.g. for (key, // doc). We want fields, but we do not want to bake in TSV into each function. type Mapper func([]byte) ([][]byte, error) @@ -316,6 +327,16 @@ func MapperOpenLibraryReleaseNormalizedISBN(p []byte) (fields [][]byte, err erro return [][]byte{key, p}, nil } +func MapperCdxSummary(p []byte) (fields [][]byte, err error) { + var ( + cdx cdxSummary + ) + if err := json.Unmarshal(p, &cdx); err != nil { + return nil, err + } + return [][]byte{[]byte(cdx.Line), p}, nil +} + // MapperPartial works on partial documents. func MapperPartial(p []byte) (fields [][]byte, err error) { // TODO: Group by some normlized container name or identifier. -- cgit v1.2.3