diff options
Diffstat (limited to 'skate/map.go')
-rw-r--r-- | skate/map.go | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/skate/map.go b/skate/map.go index 62a347d..1a8feef 100644 --- a/skate/map.go +++ b/skate/map.go @@ -222,11 +222,24 @@ func MapperContainerNameSandcrawler(p []byte) (fields [][]byte, err error) { return fields, nil } -// MapperURLFromRef extracts the (work ident, release ident, url, doc). +// MapperURLFromRef extracts the (url, doc). +func MapperURLFromRef(p []byte) (fields [][]byte, err error) { + var ref Ref + if err = json.Unmarshal(p, &ref); err != nil { + return nil, err + } + fields = [][]byte{ + []byte(ref.Biblio.Url), + p, + } + return fields, nil +} + +// MapperIdentURLFromRef extracts the (work ident, release ident, url, doc). // Previously: parallel -j 16 --block 100M --pipe "jq -rc '[.work_ident, // .release_ident, .biblio.url?] | @tsv'" ... // This implementation seems slightly faster than jq and parallel. -func MapperURLFromRef(p []byte) (fields [][]byte, err error) { +func MapperIdentURLFromRef(p []byte) (fields [][]byte, err error) { var ref Ref if err = json.Unmarshal(p, &ref); err != nil { return nil, err |