diff options
-rw-r--r-- | skate/cmd/skate-map/main.go | 3 | ||||
-rw-r--r-- | skate/map.go | 17 |
2 files changed, 17 insertions, 3 deletions
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go index 202f8bd..5b86250 100644 --- a/skate/cmd/skate-map/main.go +++ b/skate/cmd/skate-map/main.go @@ -73,7 +73,8 @@ func main() { "tn": skate.MapperTitleNormalized, "ty": skate.MapperTitleNysiis, "ts": skate.MapperTitleSandcrawler, - "ru": skate.MapperURLFromRef, + "ur": skate.MapperURLFromRef, + "ru": skate.MapperIdentURLFromRef, "cni": skate.MapperContainerName, "cns": skate.MapperContainerNameSandcrawler, "rcns": skate.MapperReleaseContainerName, diff --git a/skate/map.go b/skate/map.go index 62a347d..1a8feef 100644 --- a/skate/map.go +++ b/skate/map.go @@ -222,11 +222,24 @@ func MapperContainerNameSandcrawler(p []byte) (fields [][]byte, err error) { return fields, nil } -// MapperURLFromRef extracts the (work ident, release ident, url, doc). +// MapperURLFromRef extracts the (url, doc). +func MapperURLFromRef(p []byte) (fields [][]byte, err error) { + var ref Ref + if err = json.Unmarshal(p, &ref); err != nil { + return nil, err + } + fields = [][]byte{ + []byte(ref.Biblio.Url), + p, + } + return fields, nil +} + +// MapperIdentURLFromRef extracts the (work ident, release ident, url, doc). // Previously: parallel -j 16 --block 100M --pipe "jq -rc '[.work_ident, // .release_ident, .biblio.url?] | @tsv'" ... // This implementation seems slightly faster than jq and parallel. -func MapperURLFromRef(p []byte) (fields [][]byte, err error) { +func MapperIdentURLFromRef(p []byte) (fields [][]byte, err error) { var ref Ref if err = json.Unmarshal(p, &ref); err != nil { return nil, err |