aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
Diffstat (limited to 'skate')
-rw-r--r--skate/cmd/skate-map/main.go1
-rw-r--r--skate/map.go17
2 files changed, 18 insertions, 0 deletions
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go
index 227acf2..9bf2d14 100644
--- a/skate/cmd/skate-map/main.go
+++ b/skate/cmd/skate-map/main.go
@@ -72,6 +72,7 @@ func main() {
"tn": skate.MapperTitleNormalized,
"ty": skate.MapperTitleNysiis,
"ts": skate.MapperTitleSandcrawler,
+ "ru": skate.MapperURLFromRef,
}
if *logFile != "" {
f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644)
diff --git a/skate/map.go b/skate/map.go
index d6e37be..459558d 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -181,6 +181,23 @@ func MapperTitleSandcrawler(p []byte) (fields [][]byte, err error) {
return fields, nil
}
+// MapperURLFromRef extracts the work, release ident, url and doc. Previously:
+// parallel -j 16 --block 100M --pipe "jq -rc '[.work_ident, .release_ident,
+// .biblio.url?] | @tsv'" ...
+func MapperURLFromRef(p []byte) (fields [][]byte, err error) {
+ var ref Ref
+ if err := json.Unmarshal(p, &ref); err != nil {
+ return nil, err
+ }
+ fields := [][]byte{
+ []byte(ref.WorkIdent),
+ []byte(ref.ReleaseIdent),
+ []byte(ref.Biblio.Url),
+ p,
+ }
+ return fields, nil
+}
+
// MapperPartial works on partial documents.
func MapperPartial(p []byte) (fields [][]byte, err error) {
// XXX: slugify authors, how to compare two author strings? How do these