aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
Diffstat (limited to 'skate')
-rw-r--r--skate/cluster.go25
-rw-r--r--skate/cmd/skate-derive-key/main.go22
2 files changed, 27 insertions, 20 deletions
diff --git a/skate/cluster.go b/skate/cluster.go
index 7fc4e1b..3421a0b 100644
--- a/skate/cluster.go
+++ b/skate/cluster.go
@@ -1,6 +1,7 @@
package skate
import (
+ "fmt"
"regexp"
"strings"
@@ -109,6 +110,30 @@ func KeyTitleSandcrawler(p []byte) (ident string, key string, err error) {
return ident, sandcrawlerSlugify(key), nil
}
+// CreateFixedFieldFunc creates an extractor function given a json path.
+// Currently only top level key is supported.
+func CreateFixedFieldFunc(path string) IdentifierKeyFunc {
+ f := func(p []byte) (ident string, key string, err error) {
+ var doc map[string]interface{}
+ if err = json.Unmarshal(p, &doc); err != nil {
+ return
+ }
+ v, ok := doc[path]
+ if !ok {
+ return "", "", nil
+ }
+ switch t := v.(type) {
+ case string:
+ return "", t, nil
+ case int, int64, float32, float64:
+ return "", fmt.Sprintf("%v", t), nil
+ default:
+ return "", "", nil
+ }
+ }
+ return f
+}
+
// sandcrawlerSlugify normalizes a string.
func sandcrawlerSlugify(s string) string {
slug := strings.ToLower(strings.TrimSpace(s))
diff --git a/skate/cmd/skate-derive-key/main.go b/skate/cmd/skate-derive-key/main.go
index 61c79e1..05e1261 100644
--- a/skate/cmd/skate-derive-key/main.go
+++ b/skate/cmd/skate-derive-key/main.go
@@ -29,7 +29,6 @@ import (
"git.archive.org/martin/cgraph/skate"
"git.archive.org/martin/cgraph/skate/parallel"
- json "github.com/segmentio/encoding/json"
)
var (
@@ -40,7 +39,7 @@ var (
verbose = flag.Bool("verbose", false, "show progress")
bestEffort = flag.Bool("B", false, "best effort")
logFile = flag.String("log", "", "log filename")
- skipEmptyKeys = flag.Bool("skip-empty-keys", false, "omit docs without keys")
+ skipEmptyKeys = flag.Bool("skip-empty-keys", false, "omit docs with empty keys")
wsReplacer = strings.NewReplacer("\t", "", "\n", "")
keyOpts = map[string]skate.IdentifierKeyFunc{
@@ -58,24 +57,7 @@ func main() {
if *fixedField != "" {
// We want this, because from biblioref we wanted source_release_ident,
// from refs release_ident, etc.
- keyFunc = func(p []byte) (id string, key string, err error) {
- var doc map[string]interface{}
- if err = json.Unmarshal(p, &doc); err != nil {
- return
- }
- v, ok := doc[*fixedField]
- if !ok {
- return "", "", nil
- }
- switch t := v.(type) {
- case string:
- return "", t, nil
- case int, int64, float32, float64:
- return "", fmt.Sprintf("%v", t), nil
- default:
- return "", "", nil
- }
- }
+ keyFunc = skate.CreateFixedFieldFunc(*fixedField)
} else {
if keyFunc, ok = keyOpts[*keyFuncName]; !ok {
log.Fatal("invalid key func")