aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-15 17:51:12 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-15 17:51:12 +0200
commitd1c3672fd878ea365eb87de86da8351dad94e2dc (patch)
tree368132fb07a71a9be0e9de63468a9b022b34d4ff /skate
parentf0a3f1f32fafe514940ffe44f2373a534a3bbfc4 (diff)
downloadrefcat-d1c3672fd878ea365eb87de86da8351dad94e2dc.tar.gz
refcat-d1c3672fd878ea365eb87de86da8351dad94e2dc.zip
mapper: add cdxu
Diffstat (limited to 'skate')
-rw-r--r--skate/cmd/skate-map/main.go1
-rw-r--r--skate/map.go21
2 files changed, 22 insertions, 0 deletions
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go
index 5b86250..f87c02f 100644
--- a/skate/cmd/skate-map/main.go
+++ b/skate/cmd/skate-map/main.go
@@ -80,6 +80,7 @@ func main() {
"rcns": skate.MapperReleaseContainerName,
"vcns": skate.MapperReleaseResolvedContainerName,
"isbn": skate.MapperOpenLibraryReleaseNormalizedISBN,
+ "cdxu": skate.MapperCdxSummary,
}
if *logFile != "" {
f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644)
diff --git a/skate/map.go b/skate/map.go
index 1a8feef..ef9c018 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -44,6 +44,17 @@ type PartialRef struct {
Year string `json:"release_year"`
}
+// cdxSummary is an ad-hoc CDX summary format, created currently by skate-cdx-lookup.
+type cdxSummary struct {
+ Line string `json:"line"`
+ NumRows int64 `json:"numRows"`
+ Summary struct {
+ Delta int64 `json:"delta"`
+ Last string `json:"last"`
+ Ok string `json:"ok"`
+ } `json:"summary"`
+}
+
// Mapper maps a blob to an arbitrary number of fields, e.g. for (key,
// doc). We want fields, but we do not want to bake in TSV into each function.
type Mapper func([]byte) ([][]byte, error)
@@ -316,6 +327,16 @@ func MapperOpenLibraryReleaseNormalizedISBN(p []byte) (fields [][]byte, err erro
return [][]byte{key, p}, nil
}
+func MapperCdxSummary(p []byte) (fields [][]byte, err error) {
+ var (
+ cdx cdxSummary
+ )
+ if err := json.Unmarshal(p, &cdx); err != nil {
+ return nil, err
+ }
+ return [][]byte{[]byte(cdx.Line), p}, nil
+}
+
// MapperPartial works on partial documents.
func MapperPartial(p []byte) (fields [][]byte, err error) {
// TODO: Group by some normlized container name or identifier.