1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
// magrefs turns MAG references into a doi-to-doi version
// PaperReferences.txt is a two column file: PaperId, PaperReferenceId.
//
// sqlite> .schema
// CREATE TABLE map
// (
// k TEXT,
// v TEXT
// );
// CREATE INDEX idx_k ON map(k);
// CREATE INDEX idx_v ON map(v);
// sqlite> select * from map limit 10;
// 2257060365|10.3233/978-1-58603-957-8-354
// 14558443|10.1007/978-3-662-45174-8_28
// 15354235|10.1007/978-3-662-44777-2_60
//
package main
import (
"database/sql"
"flag"
"fmt"
"log"
"os"
"strings"
"github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3"
"github.com/miku/parallel"
)
var mappingDatabase = flag.String("m", "", "mapping database (k=mag_id, v=doi)")
type Map struct {
Key string `db:"k"`
Value string `db:"v"`
}
func main() {
flag.Parse()
if *mappingDatabase == "" {
log.Fatal("mapping database required")
}
db, err := sqlx.Open("sqlite3", fmt.Sprintf("file:%s?mode=ro", *mappingDatabase))
if err != nil {
log.Fatal(err)
}
defer db.Close()
pp := parallel.NewProcessor(os.Stdin, os.Stdout, func(p []byte) ([]byte, error) {
fields := strings.Split(string(p), "\t")
if len(fields) < 2 {
return nil, nil
}
var (
ms, mt Map
source = strings.TrimSpace(fields[0])
target = strings.TrimSpace(fields[1])
)
if err := db.Get(&ms, "SELECT * FROM map where k = ?", source); err != nil {
if err == sql.ErrNoRows {
return nil, nil
}
return nil, err
}
if err := db.Get(&mt, "SELECT * FROM map where k = ?", target); err != nil {
if err == sql.ErrNoRows {
return nil, nil
}
return nil, err
}
line := fmt.Sprintf("%s\t%s\n", ms.Value, mt.Value)
return []byte(line), nil
})
if err := pp.Run(); err != nil {
log.Fatal(err)
}
}
|