package skate import ( "bytes" "reflect" "runtime" "strconv" "strings" json "github.com/segmentio/encoding/json" ) var ( bTab = []byte("\b") bNewline = []byte("\n") ) type TitleDoc struct { Title string `json:"title"` } // Mapper converts a blob. type Mapper func([]byte) ([]byte, error) // FieldMapper maps a blob to an arbitrary number of fields, e.g. for (key, // doc) etc. type FieldMapper func([]byte) ([][]byte, error) // TSV serialized the result of a field mapper as TSV. func (f FieldMapper) TSV(p []byte) ([]byte, error) { fields, err := f(p) if err != nil { return nil, err } return append(bytes.Join(fields, bTab), bNewline...), nil } // NameOf returns name of value, e.g. the name of a function. func NameOf(f interface{}) string { v := reflect.ValueOf(f) if v.Kind() == reflect.Func { if rf := runtime.FuncForPC(v.Pointer()); rf != nil { return rf.Name() } } return v.String() } func Identity(p []byte) ([][]byte, error) { return [][]byte{p}, nil } func CreateFixedMapper(path string) FieldMapper { f := func(p []byte) ([][]byte, error) { var ( doc map[string]interface{} v interface{} ok bool key []byte ) if err := json.Unmarshal(p, &doc); err != nil { return nil, err } if v, ok = doc[path]; !ok { return nil, nil } switch w := v.(type) { case string: key = []byte(w) case int: key = []byte(strconv.Itoa(w)) case int64: key = []byte(strconv.Itoa(int(w))) case float64: key = []byte(strconv.FormatFloat(w, 'f', 52, 64)) default: return nil, nil } return [][]byte{key, p}, nil } return f } func MapperTitle(p []byte) ([][]byte, error) { var ( doc TitleDoc key []byte ) if err := json.Unmarshal(p, &doc); err != nil { return nil, err } else { key = []byte(wsReplacer.Replace(strings.TrimSpace(doc.Title))) } return [][]byte{key, p}, nil } func MapperTitleNormalized(p []byte) (fields [][]byte, err error) { if fields, err = MapperTitle(p); err != nil { return nil, err } key := string(fields[0]) key = wsReplacer.Replace(strings.TrimSpace(key)) key = strings.ToLower(key) key = repeatedWs.ReplaceAllString(key, " ") key = nonWord.ReplaceAllString(key, "") fields[0] = []byte(key) return fields, nil } func MapperTitleNysiis(p []byte) (fields [][]byte, err error) { if fields, err = MapperTitle(p); err != nil { return nil, err } key := string(fields[0]) key = wsReplacer.Replace(strings.TrimSpace(key)) key = NYSIIS(key) fields[0] = []byte(key) return fields, nil } func MapperTitleSandcrawler(p []byte) (fields [][]byte, err error) { if fields, err = MapperTitle(p); err != nil { return nil, err } key := string(fields[0]) key = sandcrawlerSlugify(wsReplacer.Replace(strings.TrimSpace(key))) fields[0] = []byte(key) return fields, nil }