aboutsummaryrefslogtreecommitdiffstats
path: root/skate/cmd/skate-derive-key
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-04-15 20:48:37 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-04-19 20:29:17 +0200
commitac1e5735e19595f5a03154ba059ff5132547c643 (patch)
treef399c76fcb0f89aba59add12707793c577f4558d /skate/cmd/skate-derive-key
parentf338c11e6fef8314410fd9ab7ffb8f563d6f4c5e (diff)
downloadrefcat-ac1e5735e19595f5a03154ba059ff5132547c643.tar.gz
refcat-ac1e5735e19595f5a03154ba059ff5132547c643.zip
add skate-derive-key -F for fixed field
Diffstat (limited to 'skate/cmd/skate-derive-key')
-rw-r--r--skate/cmd/skate-derive-key/main.go30
1 files changed, 26 insertions, 4 deletions
diff --git a/skate/cmd/skate-derive-key/main.go b/skate/cmd/skate-derive-key/main.go
index 653e258..b5b13f2 100644
--- a/skate/cmd/skate-derive-key/main.go
+++ b/skate/cmd/skate-derive-key/main.go
@@ -30,10 +30,12 @@ import (
"git.archive.org/martin/cgraph/skate"
"git.archive.org/martin/cgraph/skate/parallel"
+ jsoniter "github.com/json-iterator/go"
)
var (
- keyFuncName = flag.String("f", "tsand", "key function name, other: title, tnorm, tnysi, tsand, ident")
+ keyFuncName = flag.String("f", "tsand", "key function name, other: title, tnorm, tnysi, tsand")
+ fixedField = flag.String("F", "", "extract value from a fixed top level field, e.g. source_release_ident, ...")
numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers")
batchSize = flag.Int("b", 50000, "batch size")
verbose = flag.Bool("verbose", false, "show progress")
@@ -47,16 +49,36 @@ var (
"tnorm": skate.KeyTitleNormalized,
"tnysi": skate.KeyTitleNysiis,
"tsand": skate.KeyTitleSandcrawler,
- "ident": skate.KeySourceIdent,
}
keyFunc skate.IdentifierKeyFunc
ok bool
+ json = jsoniter.ConfigCompatibleWithStandardLibrary
)
func main() {
flag.Parse()
- if keyFunc, ok = keyOpts[*keyFuncName]; !ok {
- log.Fatal("invalid key func")
+ if *fixedField != "" {
+ // We want this, because from biblioref we wanted source_release_ident,
+ // from refs release_ident, etc.
+ keyFunc = func(p []byte) (id string, key string, err error) {
+ var doc map[string]interface{}
+ if err = json.Unmarshal(p, &doc); err != nil {
+ return
+ }
+ v, ok := doc[*fixedField]
+ if !ok {
+ return "", "", nil
+ }
+ s, ok := v.(string)
+ if !ok {
+ return "", "", nil
+ }
+ return "", s, nil
+ }
+ } else {
+ if keyFunc, ok = keyOpts[*keyFuncName]; !ok {
+ log.Fatal("invalid key func")
+ }
}
if *logFile != "" {
f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644)