aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-05-06 20:13:30 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-05-06 20:13:30 +0200
commit6e100b23bf846f36025092a83d433e6cf4f0a9b6 (patch)
tree015948b142aff27b724285b5dabc5e724f178d4d /skate
parent6c327acbf5799dde9c153843ac3ba1471e88317c (diff)
downloadrefcat-6e100b23bf846f36025092a83d433e6cf4f0a9b6.tar.gz
refcat-6e100b23bf846f36025092a83d433e6cf4f0a9b6.zip
add a url extractor mapper
Diffstat (limited to 'skate')
-rw-r--r--skate/cmd/skate-map/main.go1
-rw-r--r--skate/map.go17
2 files changed, 18 insertions, 0 deletions
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go
index 227acf2..9bf2d14 100644
--- a/skate/cmd/skate-map/main.go
+++ b/skate/cmd/skate-map/main.go
@@ -72,6 +72,7 @@ func main() {
"tn": skate.MapperTitleNormalized,
"ty": skate.MapperTitleNysiis,
"ts": skate.MapperTitleSandcrawler,
+ "ru": skate.MapperURLFromRef,
}
if *logFile != "" {
f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644)
diff --git a/skate/map.go b/skate/map.go
index d6e37be..459558d 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -181,6 +181,23 @@ func MapperTitleSandcrawler(p []byte) (fields [][]byte, err error) {
return fields, nil
}
+// MapperURLFromRef extracts the work, release ident, url and doc. Previously:
+// parallel -j 16 --block 100M --pipe "jq -rc '[.work_ident, .release_ident,
+// .biblio.url?] | @tsv'" ...
+func MapperURLFromRef(p []byte) (fields [][]byte, err error) {
+ var ref Ref
+ if err := json.Unmarshal(p, &ref); err != nil {
+ return nil, err
+ }
+ fields := [][]byte{
+ []byte(ref.WorkIdent),
+ []byte(ref.ReleaseIdent),
+ []byte(ref.Biblio.Url),
+ p,
+ }
+ return fields, nil
+}
+
// MapperPartial works on partial documents.
func MapperPartial(p []byte) (fields [][]byte, err error) {
// XXX: slugify authors, how to compare two author strings? How do these