diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-15 17:51:12 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-15 17:51:12 +0200 |
commit | d1c3672fd878ea365eb87de86da8351dad94e2dc (patch) | |
tree | 368132fb07a71a9be0e9de63468a9b022b34d4ff | |
parent | f0a3f1f32fafe514940ffe44f2373a534a3bbfc4 (diff) | |
download | refcat-d1c3672fd878ea365eb87de86da8351dad94e2dc.tar.gz refcat-d1c3672fd878ea365eb87de86da8351dad94e2dc.zip |
mapper: add cdxu
-rw-r--r-- | skate/cmd/skate-map/main.go | 1 | ||||
-rw-r--r-- | skate/map.go | 21 |
2 files changed, 22 insertions, 0 deletions
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go index 5b86250..f87c02f 100644 --- a/skate/cmd/skate-map/main.go +++ b/skate/cmd/skate-map/main.go @@ -80,6 +80,7 @@ func main() { "rcns": skate.MapperReleaseContainerName, "vcns": skate.MapperReleaseResolvedContainerName, "isbn": skate.MapperOpenLibraryReleaseNormalizedISBN, + "cdxu": skate.MapperCdxSummary, } if *logFile != "" { f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644) diff --git a/skate/map.go b/skate/map.go index 1a8feef..ef9c018 100644 --- a/skate/map.go +++ b/skate/map.go @@ -44,6 +44,17 @@ type PartialRef struct { Year string `json:"release_year"` } +// cdxSummary is an ad-hoc CDX summary format, created currently by skate-cdx-lookup. +type cdxSummary struct { + Line string `json:"line"` + NumRows int64 `json:"numRows"` + Summary struct { + Delta int64 `json:"delta"` + Last string `json:"last"` + Ok string `json:"ok"` + } `json:"summary"` +} + // Mapper maps a blob to an arbitrary number of fields, e.g. for (key, // doc). We want fields, but we do not want to bake in TSV into each function. type Mapper func([]byte) ([][]byte, error) @@ -316,6 +327,16 @@ func MapperOpenLibraryReleaseNormalizedISBN(p []byte) (fields [][]byte, err erro return [][]byte{key, p}, nil } +func MapperCdxSummary(p []byte) (fields [][]byte, err error) { + var ( + cdx cdxSummary + ) + if err := json.Unmarshal(p, &cdx); err != nil { + return nil, err + } + return [][]byte{[]byte(cdx.Line), p}, nil +} + // MapperPartial works on partial documents. func MapperPartial(p []byte) (fields [][]byte, err error) { // TODO: Group by some normlized container name or identifier. |