1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
package skate
import (
"bytes"
"reflect"
"runtime"
"strconv"
"strings"
json "github.com/segmentio/encoding/json"
)
var (
bTab = []byte("\b")
bNewline = []byte("\n")
)
type TitleDoc struct {
Title string `json:"title"`
}
// Mapper converts a blob.
type Mapper func([]byte) ([]byte, error)
// FieldMapper maps a blob to an arbitrary number of fields, e.g. for (key,
// doc) etc.
type FieldMapper func([]byte) ([][]byte, error)
// TSV serialized the result of a field mapper as TSV.
func (f FieldMapper) TSV(p []byte) ([]byte, error) {
fields, err := f(p)
if err != nil {
return nil, err
}
return append(bytes.Join(fields, bTab), bNewline...), nil
}
// NameOf returns name of value, e.g. the name of a function.
func NameOf(f interface{}) string {
v := reflect.ValueOf(f)
if v.Kind() == reflect.Func {
if rf := runtime.FuncForPC(v.Pointer()); rf != nil {
return rf.Name()
}
}
return v.String()
}
func Identity(p []byte) ([][]byte, error) {
return [][]byte{p}, nil
}
func CreateFixedMapper(path string) FieldMapper {
f := func(p []byte) ([][]byte, error) {
var (
doc map[string]interface{}
v interface{}
ok bool
key []byte
)
if err := json.Unmarshal(p, &doc); err != nil {
return nil, err
}
if v, ok = doc[path]; !ok {
return nil, nil
}
switch w := v.(type) {
case string:
key = []byte(w)
case int:
key = []byte(strconv.Itoa(w))
case int64:
key = []byte(strconv.Itoa(int(w)))
case float64:
key = []byte(strconv.FormatFloat(w, 'f', 52, 64))
default:
return nil, nil
}
return [][]byte{key, p}, nil
}
return f
}
func MapperTitle(p []byte) ([][]byte, error) {
var (
doc TitleDoc
key []byte
)
if err := json.Unmarshal(p, &doc); err != nil {
return nil, err
} else {
key = []byte(wsReplacer.Replace(strings.TrimSpace(doc.Title)))
}
return [][]byte{key, p}, nil
}
func MapperTitleNormalized(p []byte) (fields [][]byte, err error) {
if fields, err = MapperTitle(p); err != nil {
return nil, err
}
key := string(fields[0])
key = wsReplacer.Replace(strings.TrimSpace(key))
key = strings.ToLower(key)
key = repeatedWs.ReplaceAllString(key, " ")
key = nonWord.ReplaceAllString(key, "")
fields[0] = []byte(key)
return fields, nil
}
func MapperTitleNysiis(p []byte) (fields [][]byte, err error) {
if fields, err = MapperTitle(p); err != nil {
return nil, err
}
key := string(fields[0])
key = wsReplacer.Replace(strings.TrimSpace(key))
key = NYSIIS(key)
fields[0] = []byte(key)
return fields, nil
}
func MapperTitleSandcrawler(p []byte) (fields [][]byte, err error) {
if fields, err = MapperTitle(p); err != nil {
return nil, err
}
key := string(fields[0])
key = sandcrawlerSlugify(wsReplacer.Replace(strings.TrimSpace(key)))
fields[0] = []byte(key)
return fields, nil
}
|