diff options
Diffstat (limited to 'skate/map.go')
-rw-r--r-- | skate/map.go | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/skate/map.go b/skate/map.go new file mode 100644 index 0000000..ae8b59f --- /dev/null +++ b/skate/map.go @@ -0,0 +1,101 @@ +package skate + +import ( + "bytes" + "fmt" + "reflect" + "runtime" + "strings" + + json "github.com/segmentio/encoding/json" +) + +type Mapper func([]byte) ([]byte, error) + +// NameOf returns name of value, e.g. the name of a function. +func NameOf(f interface{}) string { + v := reflect.ValueOf(f) + if v.Kind() == reflect.Func { + if rf := runtime.FuncForPC(v.Pointer()); rf != nil { + return rf.Name() + } + } + return v.String() +} + +// Identity mapper. +func Identity(p []byte) ([]byte, error) { + return p, nil +} + +// CreateFixedFieldFunc creates an extractor function given a json path. +// Currently only top level key is supported. +func CreateFixedFieldFunc(path string) Mapper { + f := func(p []byte) ([]byte, error) { + var doc map[string]interface{} + if err := json.Unmarshal(p, &doc); err != nil { + return nil, err + } + v, ok := doc[path] + if !ok { + return nil, nil + } + switch t := v.(type) { + case string: + return []byte(fmt.Sprintf("%v\t%s", t, p)), nil + case int, int64, float32, float64: + return []byte(fmt.Sprintf("%v\t%s", t, p)), nil + default: + return nil, nil + } + } + return f +} + +func MapperTitle(p []byte) ([]byte, error) { + var doc struct { + Title string + } + if err := json.Unmarshal(p, &doc); err != nil { + return nil, err + } + title := wsReplacer.Replace(strings.TrimSpace(doc.Title)) + return bytes.Join([][]byte{[]byte(title), p}, []byte("\t")), nil +} + +func MapperTitleNormalized(p []byte) ([]byte, error) { + var doc struct { + Title string + } + if err := json.Unmarshal(p, &doc); err != nil { + return nil, err + } + title := wsReplacer.Replace(strings.TrimSpace(doc.Title)) + title = strings.ToLower(title) + title = repeatedWs.ReplaceAllString(title, " ") + title = nonWord.ReplaceAllString(title, "") + return bytes.Join([][]byte{[]byte(title), p}, []byte("\t")), nil +} + +func MapperTitleNysiis(p []byte) ([]byte, error) { + var doc struct { + Title string + } + if err := json.Unmarshal(p, &doc); err != nil { + return nil, err + } + title := wsReplacer.Replace(strings.TrimSpace(doc.Title)) + title = NYSIIS(title) + return bytes.Join([][]byte{[]byte(title), p}, []byte("\t")), nil +} + +func MapperTitleSandcrawler(p []byte) ([]byte, error) { + var doc struct { + Title string + } + if err := json.Unmarshal(p, &doc); err != nil { + return nil, err + } + title := sandcrawlerSlugify(wsReplacer.Replace(strings.TrimSpace(doc.Title))) + return bytes.Join([][]byte{[]byte(title), p}, []byte("\t")), nil +} |