aboutsummaryrefslogtreecommitdiffstats
path: root/extra/mag/magrefs.go
blob: bf1781b5a065703704ae4ac4009729cd4ac3b57a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
// magrefs turns MAG references into a doi-to-doi version
// PaperReferences.txt is a two column file: PaperId, PaperReferenceId.
//
// sqlite> .schema
// CREATE TABLE map
// (
//         k TEXT,
//         v TEXT
// );
// CREATE INDEX idx_k ON map(k);
// CREATE INDEX idx_v ON map(v);
// sqlite> select * from map limit 10;
// 2257060365|10.3233/978-1-58603-957-8-354
// 14558443|10.1007/978-3-662-45174-8_28
// 15354235|10.1007/978-3-662-44777-2_60
//
package main

import (
	"database/sql"
	"flag"
	"fmt"
	"log"
	"os"
	"strings"

	"github.com/jmoiron/sqlx"
	_ "github.com/mattn/go-sqlite3"
	"github.com/miku/parallel"
)

var mappingDatabase = flag.String("m", "", "mapping database (k=mag_id, v=doi)")

type Map struct {
	Key   string `db:"k"`
	Value string `db:"v"`
}

func main() {
	flag.Parse()
	if *mappingDatabase == "" {
		log.Fatal("mapping database required")
	}
	db, err := sqlx.Open("sqlite3", fmt.Sprintf("file:%s?mode=ro", *mappingDatabase))
	if err != nil {
		log.Fatal(err)
	}
	defer db.Close()
	pp := parallel.NewProcessor(os.Stdin, os.Stdout, func(p []byte) ([]byte, error) {
		fields := strings.Split(string(p), "\t")
		if len(fields) < 2 {
			return nil, nil
		}
		var (
			ms, mt Map
			source = strings.TrimSpace(fields[0])
			target = strings.TrimSpace(fields[1])
		)
		if err := db.Get(&ms, "SELECT * FROM map where k = ?", source); err != nil {
			if err == sql.ErrNoRows {
				return nil, nil
			}
			return nil, err
		}
		if err := db.Get(&mt, "SELECT * FROM map where k = ?", target); err != nil {
			if err == sql.ErrNoRows {
				return nil, nil
			}
			return nil, err
		}
		line := fmt.Sprintf("%s\t%s\n", ms.Value, mt.Value)
		return []byte(line), nil
	})
	if err := pp.Run(); err != nil {
		log.Fatal(err)
	}
}