aboutsummaryrefslogtreecommitdiffstats
path: root/skate/map.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/map.go')
-rw-r--r--skate/map.go101
1 files changed, 101 insertions, 0 deletions
diff --git a/skate/map.go b/skate/map.go
new file mode 100644
index 0000000..ae8b59f
--- /dev/null
+++ b/skate/map.go
@@ -0,0 +1,101 @@
+package skate
+
+import (
+ "bytes"
+ "fmt"
+ "reflect"
+ "runtime"
+ "strings"
+
+ json "github.com/segmentio/encoding/json"
+)
+
+type Mapper func([]byte) ([]byte, error)
+
+// NameOf returns name of value, e.g. the name of a function.
+func NameOf(f interface{}) string {
+ v := reflect.ValueOf(f)
+ if v.Kind() == reflect.Func {
+ if rf := runtime.FuncForPC(v.Pointer()); rf != nil {
+ return rf.Name()
+ }
+ }
+ return v.String()
+}
+
+// Identity mapper.
+func Identity(p []byte) ([]byte, error) {
+ return p, nil
+}
+
+// CreateFixedFieldFunc creates an extractor function given a json path.
+// Currently only top level key is supported.
+func CreateFixedFieldFunc(path string) Mapper {
+ f := func(p []byte) ([]byte, error) {
+ var doc map[string]interface{}
+ if err := json.Unmarshal(p, &doc); err != nil {
+ return nil, err
+ }
+ v, ok := doc[path]
+ if !ok {
+ return nil, nil
+ }
+ switch t := v.(type) {
+ case string:
+ return []byte(fmt.Sprintf("%v\t%s", t, p)), nil
+ case int, int64, float32, float64:
+ return []byte(fmt.Sprintf("%v\t%s", t, p)), nil
+ default:
+ return nil, nil
+ }
+ }
+ return f
+}
+
+func MapperTitle(p []byte) ([]byte, error) {
+ var doc struct {
+ Title string
+ }
+ if err := json.Unmarshal(p, &doc); err != nil {
+ return nil, err
+ }
+ title := wsReplacer.Replace(strings.TrimSpace(doc.Title))
+ return bytes.Join([][]byte{[]byte(title), p}, []byte("\t")), nil
+}
+
+func MapperTitleNormalized(p []byte) ([]byte, error) {
+ var doc struct {
+ Title string
+ }
+ if err := json.Unmarshal(p, &doc); err != nil {
+ return nil, err
+ }
+ title := wsReplacer.Replace(strings.TrimSpace(doc.Title))
+ title = strings.ToLower(title)
+ title = repeatedWs.ReplaceAllString(title, " ")
+ title = nonWord.ReplaceAllString(title, "")
+ return bytes.Join([][]byte{[]byte(title), p}, []byte("\t")), nil
+}
+
+func MapperTitleNysiis(p []byte) ([]byte, error) {
+ var doc struct {
+ Title string
+ }
+ if err := json.Unmarshal(p, &doc); err != nil {
+ return nil, err
+ }
+ title := wsReplacer.Replace(strings.TrimSpace(doc.Title))
+ title = NYSIIS(title)
+ return bytes.Join([][]byte{[]byte(title), p}, []byte("\t")), nil
+}
+
+func MapperTitleSandcrawler(p []byte) ([]byte, error) {
+ var doc struct {
+ Title string
+ }
+ if err := json.Unmarshal(p, &doc); err != nil {
+ return nil, err
+ }
+ title := sandcrawlerSlugify(wsReplacer.Replace(strings.TrimSpace(doc.Title)))
+ return bytes.Join([][]byte{[]byte(title), p}, []byte("\t")), nil
+}