diff options
Diffstat (limited to 'skate/cmd')
| -rw-r--r-- | skate/cmd/skate-derive-key/main.go | 30 |
1 files changed, 26 insertions, 4 deletions
diff --git a/skate/cmd/skate-derive-key/main.go b/skate/cmd/skate-derive-key/main.go index 653e258..b5b13f2 100644 --- a/skate/cmd/skate-derive-key/main.go +++ b/skate/cmd/skate-derive-key/main.go @@ -30,10 +30,12 @@ import ( "git.archive.org/martin/cgraph/skate" "git.archive.org/martin/cgraph/skate/parallel" + jsoniter "github.com/json-iterator/go" ) var ( - keyFuncName = flag.String("f", "tsand", "key function name, other: title, tnorm, tnysi, tsand, ident") + keyFuncName = flag.String("f", "tsand", "key function name, other: title, tnorm, tnysi, tsand") + fixedField = flag.String("F", "", "extract value from a fixed top level field, e.g. source_release_ident, ...") numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") batchSize = flag.Int("b", 50000, "batch size") verbose = flag.Bool("verbose", false, "show progress") @@ -47,16 +49,36 @@ var ( "tnorm": skate.KeyTitleNormalized, "tnysi": skate.KeyTitleNysiis, "tsand": skate.KeyTitleSandcrawler, - "ident": skate.KeySourceIdent, } keyFunc skate.IdentifierKeyFunc ok bool + json = jsoniter.ConfigCompatibleWithStandardLibrary ) func main() { flag.Parse() - if keyFunc, ok = keyOpts[*keyFuncName]; !ok { - log.Fatal("invalid key func") + if *fixedField != "" { + // We want this, because from biblioref we wanted source_release_ident, + // from refs release_ident, etc. + keyFunc = func(p []byte) (id string, key string, err error) { + var doc map[string]interface{} + if err = json.Unmarshal(p, &doc); err != nil { + return + } + v, ok := doc[*fixedField] + if !ok { + return "", "", nil + } + s, ok := v.(string) + if !ok { + return "", "", nil + } + return "", s, nil + } + } else { + if keyFunc, ok = keyOpts[*keyFuncName]; !ok { + log.Fatal("invalid key func") + } } if *logFile != "" { f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644) |
