// magrefs turns MAG references into a doi-to-doi version // PaperReferences.txt is a two column file: PaperId, PaperReferenceId. // // sqlite> .schema // CREATE TABLE map // ( // k TEXT, // v TEXT // ); // CREATE INDEX idx_k ON map(k); // CREATE INDEX idx_v ON map(v); // sqlite> select * from map limit 10; // 2257060365|10.3233/978-1-58603-957-8-354 // 14558443|10.1007/978-3-662-45174-8_28 // 15354235|10.1007/978-3-662-44777-2_60 // package main import ( "database/sql" "flag" "fmt" "log" "os" "strings" "github.com/jmoiron/sqlx" _ "github.com/mattn/go-sqlite3" "github.com/miku/parallel" ) var mappingDatabase = flag.String("m", "", "mapping database (k=mag_id, v=doi)") type Map struct { Key string `db:"k"` Value string `db:"v"` } func main() { flag.Parse() if *mappingDatabase == "" { log.Fatal("mapping database required") } db, err := sqlx.Open("sqlite3", fmt.Sprintf("file:%s?mode=ro", *mappingDatabase)) if err != nil { log.Fatal(err) } defer db.Close() pp := parallel.NewProcessor(os.Stdin, os.Stdout, func(p []byte) ([]byte, error) { fields := strings.Split(string(p), "\t") if len(fields) < 2 { return nil, nil } var ( ms, mt Map source = strings.TrimSpace(fields[0]) target = strings.TrimSpace(fields[1]) ) if err := db.Get(&ms, "SELECT * FROM map where k = ?", source); err != nil { if err == sql.ErrNoRows { return nil, nil } return nil, err } if err := db.Get(&mt, "SELECT * FROM map where k = ?", target); err != nil { if err == sql.ErrNoRows { return nil, nil } return nil, err } line := fmt.Sprintf("%s\t%s\n", ms.Value, mt.Value) return []byte(line), nil }) if err := pp.Run(); err != nil { log.Fatal(err) } }