// skate-conv converts various schemas into releases. This should replace the // very specific skate-ref-to-release and the like. // // $ skate-conv -f ref < FILE > FILE // // Currently source schemas: "ref", "ol", "rg" package main import ( "flag" "log" "os" "runtime" "strings" "git.archive.org/martin/cgraph/skate" "git.archive.org/martin/cgraph/skate/parallel" json "github.com/segmentio/encoding/json" ) var ( numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") batchSize = flag.Int("b", 100000, "batch size") fromFormat = flag.String("f", "ref", "import schema") bytesNewline = []byte("\n") f func([]byte) ([]byte, error) ) func main() { flag.Parse() switch *fromFormat { case "ref": f = refToRelease case "rg": f = rgSitemapToRelease case "ol": f = openLibraryToRelease } pp := parallel.NewProcessor(os.Stdin, os.Stdout, f) pp.NumWorkers = *numWorkers pp.BatchSize = *batchSize if err := pp.Run(); err != nil { log.Fatal(err) } } // refToRelease converts a ref document to a release. func refToRelease(p []byte) ([]byte, error) { var ref skate.Ref if err := json.Unmarshal(p, &ref); err != nil { return nil, err } release, err := skate.RefToRelease(&ref) if err != nil { return nil, err } release.Extra.Skate.Status = "ref" // means: converted from ref release.Extra.Skate.Ref.Index = ref.Index release.Extra.Skate.Ref.Key = ref.Key b, err := json.Marshal(release) b = append(b, bytesNewline...) return b, err } func rgSitemapToRelease(p []byte) ([]byte, error) { var ( s skate.Sitemap release skate.Release ) if err := json.Unmarshal(p, &s); err != nil { return nil, err } release.Title = s.Title if len(s.URL) > 41 { // XXX: A pseudo ident, maybe irritating. release.Ident = strings.Split(s.URL[41:], "_")[0] } release.Extra.Skate.Status = "rg" release.Extra.Skate.ResearchGate.URL = s.URL b, err := json.Marshal(release) b = append(b, bytesNewline...) return b, err } func openLibraryToRelease(p []byte) ([]byte, error) { var w skate.OpenLibraryWork if err := json.Unmarshal(p, &w); err != nil { return nil, err } release, err := skate.OpenLibraryToRelease(&w) if err != nil { return nil, err } release.Extra.Skate.Status = "ol" b, err := json.Marshal(release) b = append(b, bytesNewline...) return b, err }