aboutsummaryrefslogtreecommitdiffstats
path: root/skate/map.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/map.go')
-rw-r--r--skate/map.go17
1 files changed, 15 insertions, 2 deletions
diff --git a/skate/map.go b/skate/map.go
index 62a347d..1a8feef 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -222,11 +222,24 @@ func MapperContainerNameSandcrawler(p []byte) (fields [][]byte, err error) {
return fields, nil
}
-// MapperURLFromRef extracts the (work ident, release ident, url, doc).
+// MapperURLFromRef extracts the (url, doc).
+func MapperURLFromRef(p []byte) (fields [][]byte, err error) {
+ var ref Ref
+ if err = json.Unmarshal(p, &ref); err != nil {
+ return nil, err
+ }
+ fields = [][]byte{
+ []byte(ref.Biblio.Url),
+ p,
+ }
+ return fields, nil
+}
+
+// MapperIdentURLFromRef extracts the (work ident, release ident, url, doc).
// Previously: parallel -j 16 --block 100M --pipe "jq -rc '[.work_ident,
// .release_ident, .biblio.url?] | @tsv'" ...
// This implementation seems slightly faster than jq and parallel.
-func MapperURLFromRef(p []byte) (fields [][]byte, err error) {
+func MapperIdentURLFromRef(p []byte) (fields [][]byte, err error) {
var ref Ref
if err = json.Unmarshal(p, &ref); err != nil {
return nil, err