diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-04-15 20:48:37 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-04-19 20:29:17 +0200 |
commit | ac1e5735e19595f5a03154ba059ff5132547c643 (patch) | |
tree | f399c76fcb0f89aba59add12707793c577f4558d /skate/cmd/skate-derive-key | |
parent | f338c11e6fef8314410fd9ab7ffb8f563d6f4c5e (diff) | |
download | refcat-ac1e5735e19595f5a03154ba059ff5132547c643.tar.gz refcat-ac1e5735e19595f5a03154ba059ff5132547c643.zip |
add skate-derive-key -F for fixed field
Diffstat (limited to 'skate/cmd/skate-derive-key')
-rw-r--r-- | skate/cmd/skate-derive-key/main.go | 30 |
1 files changed, 26 insertions, 4 deletions
diff --git a/skate/cmd/skate-derive-key/main.go b/skate/cmd/skate-derive-key/main.go index 653e258..b5b13f2 100644 --- a/skate/cmd/skate-derive-key/main.go +++ b/skate/cmd/skate-derive-key/main.go @@ -30,10 +30,12 @@ import ( "git.archive.org/martin/cgraph/skate" "git.archive.org/martin/cgraph/skate/parallel" + jsoniter "github.com/json-iterator/go" ) var ( - keyFuncName = flag.String("f", "tsand", "key function name, other: title, tnorm, tnysi, tsand, ident") + keyFuncName = flag.String("f", "tsand", "key function name, other: title, tnorm, tnysi, tsand") + fixedField = flag.String("F", "", "extract value from a fixed top level field, e.g. source_release_ident, ...") numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") batchSize = flag.Int("b", 50000, "batch size") verbose = flag.Bool("verbose", false, "show progress") @@ -47,16 +49,36 @@ var ( "tnorm": skate.KeyTitleNormalized, "tnysi": skate.KeyTitleNysiis, "tsand": skate.KeyTitleSandcrawler, - "ident": skate.KeySourceIdent, } keyFunc skate.IdentifierKeyFunc ok bool + json = jsoniter.ConfigCompatibleWithStandardLibrary ) func main() { flag.Parse() - if keyFunc, ok = keyOpts[*keyFuncName]; !ok { - log.Fatal("invalid key func") + if *fixedField != "" { + // We want this, because from biblioref we wanted source_release_ident, + // from refs release_ident, etc. + keyFunc = func(p []byte) (id string, key string, err error) { + var doc map[string]interface{} + if err = json.Unmarshal(p, &doc); err != nil { + return + } + v, ok := doc[*fixedField] + if !ok { + return "", "", nil + } + s, ok := v.(string) + if !ok { + return "", "", nil + } + return "", s, nil + } + } else { + if keyFunc, ok = keyOpts[*keyFuncName]; !ok { + log.Fatal("invalid key func") + } } if *logFile != "" { f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644) |