aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-04-15 01:21:00 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-04-19 20:29:17 +0200
commit88817015a69d4e24c0027d73844866d622172eec (patch)
tree195f49eaf126ca219bd03b1967d2037964592c51 /skate
parentaa37b39f72eda335a6bb24bb521464b548cf6627 (diff)
downloadrefcat-88817015a69d4e24c0027d73844866d622172eec.tar.gz
refcat-88817015a69d4e24c0027d73844866d622172eec.zip
skate: try source ident as key
Diffstat (limited to 'skate')
-rw-r--r--skate/cluster.go9
-rw-r--r--skate/cmd/skate-derive-key/main.go3
2 files changed, 11 insertions, 1 deletions
diff --git a/skate/cluster.go b/skate/cluster.go
index bec8154..954d971 100644
--- a/skate/cluster.go
+++ b/skate/cluster.go
@@ -110,6 +110,15 @@ func KeyTitleSandcrawler(p []byte) (ident string, key string, err error) {
return ident, sandcrawlerSlugify(key), nil
}
+// KeySourceIdent extracts the source ident.
+func KeySourceIdent(p []byte) (ident string, key string, err error) {
+ var doc IdentTitleDoc
+ if err = json.Unmarshal(p, &doc); err != nil {
+ return doc.Ident, doc.Ident, err
+ }
+ return doc.Ident, doc.Ident, nil
+}
+
// sandcrawlerSlugify normalizes a string.
func sandcrawlerSlugify(s string) string {
slug := strings.ToLower(strings.TrimSpace(s))
diff --git a/skate/cmd/skate-derive-key/main.go b/skate/cmd/skate-derive-key/main.go
index 2375a73..653e258 100644
--- a/skate/cmd/skate-derive-key/main.go
+++ b/skate/cmd/skate-derive-key/main.go
@@ -33,7 +33,7 @@ import (
)
var (
- keyFuncName = flag.String("f", "tsand", "key function name, other: title, tnorm, tnysi, tsand")
+ keyFuncName = flag.String("f", "tsand", "key function name, other: title, tnorm, tnysi, tsand, ident")
numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers")
batchSize = flag.Int("b", 50000, "batch size")
verbose = flag.Bool("verbose", false, "show progress")
@@ -47,6 +47,7 @@ var (
"tnorm": skate.KeyTitleNormalized,
"tnysi": skate.KeyTitleNysiis,
"tsand": skate.KeyTitleSandcrawler,
+ "ident": skate.KeySourceIdent,
}
keyFunc skate.IdentifierKeyFunc
ok bool