diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-04-15 01:21:00 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-04-19 20:29:17 +0200 |
commit | 88817015a69d4e24c0027d73844866d622172eec (patch) | |
tree | 195f49eaf126ca219bd03b1967d2037964592c51 /skate | |
parent | aa37b39f72eda335a6bb24bb521464b548cf6627 (diff) | |
download | refcat-88817015a69d4e24c0027d73844866d622172eec.tar.gz refcat-88817015a69d4e24c0027d73844866d622172eec.zip |
skate: try source ident as key
Diffstat (limited to 'skate')
-rw-r--r-- | skate/cluster.go | 9 | ||||
-rw-r--r-- | skate/cmd/skate-derive-key/main.go | 3 |
2 files changed, 11 insertions, 1 deletions
diff --git a/skate/cluster.go b/skate/cluster.go index bec8154..954d971 100644 --- a/skate/cluster.go +++ b/skate/cluster.go @@ -110,6 +110,15 @@ func KeyTitleSandcrawler(p []byte) (ident string, key string, err error) { return ident, sandcrawlerSlugify(key), nil } +// KeySourceIdent extracts the source ident. +func KeySourceIdent(p []byte) (ident string, key string, err error) { + var doc IdentTitleDoc + if err = json.Unmarshal(p, &doc); err != nil { + return doc.Ident, doc.Ident, err + } + return doc.Ident, doc.Ident, nil +} + // sandcrawlerSlugify normalizes a string. func sandcrawlerSlugify(s string) string { slug := strings.ToLower(strings.TrimSpace(s)) diff --git a/skate/cmd/skate-derive-key/main.go b/skate/cmd/skate-derive-key/main.go index 2375a73..653e258 100644 --- a/skate/cmd/skate-derive-key/main.go +++ b/skate/cmd/skate-derive-key/main.go @@ -33,7 +33,7 @@ import ( ) var ( - keyFuncName = flag.String("f", "tsand", "key function name, other: title, tnorm, tnysi, tsand") + keyFuncName = flag.String("f", "tsand", "key function name, other: title, tnorm, tnysi, tsand, ident") numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") batchSize = flag.Int("b", 50000, "batch size") verbose = flag.Bool("verbose", false, "show progress") @@ -47,6 +47,7 @@ var ( "tnorm": skate.KeyTitleNormalized, "tnysi": skate.KeyTitleNysiis, "tsand": skate.KeyTitleSandcrawler, + "ident": skate.KeySourceIdent, } keyFunc skate.IdentifierKeyFunc ok bool |