diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-03-21 01:17:38 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-03-21 01:17:38 +0100 |
commit | 09a7e8c9d013f13a1aa1ef4e9b7f397647b79967 (patch) | |
tree | 122b474e27afbc66cba1182e983ef5c8555ed12f /skate/cmd/skate-ref-to-release | |
parent | a7e0cf191ebf8fb499e0ab9a3b6cae45727f1286 (diff) | |
download | refcat-09a7e8c9d013f13a1aa1ef4e9b7f397647b79967.tar.gz refcat-09a7e8c9d013f13a1aa1ef4e9b7f397647b79967.zip |
initial import of skate
Diffstat (limited to 'skate/cmd/skate-ref-to-release')
-rw-r--r-- | skate/cmd/skate-ref-to-release/main.go | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/skate/cmd/skate-ref-to-release/main.go b/skate/cmd/skate-ref-to-release/main.go new file mode 100644 index 0000000..0eec40b --- /dev/null +++ b/skate/cmd/skate-ref-to-release/main.go @@ -0,0 +1,82 @@ +// skate-ref-to-release converts a "ref" document to a "release" document. +// +package main + +import ( + "flag" + "log" + "os" + "runtime" + "strings" + + "github.com/miku/parallel" + "git.archive.org/martin/cgraph/skate" + + jsoniter "github.com/json-iterator/go" +) + +var ( + numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") + batchSize = flag.Int("b", 100000, "batch size") + fromFormat = flag.String("f", "ref", "import data shape") + + json = jsoniter.ConfigCompatibleWithStandardLibrary + bytesNewline = []byte("\n") +) + +func refToRelease(p []byte) ([]byte, error) { + var ref skate.Ref + if err := json.Unmarshal(p, &ref); err != nil { + return nil, err + } + release, err := skate.RefToRelease(&ref) + if err != nil { + return nil, err + } + release.Extra.Skate.Status = "ref" // means: converted from ref + release.Extra.Skate.Ref.Index = ref.Index + release.Extra.Skate.Ref.Key = ref.Key + b, err := json.Marshal(release) + b = append(b, bytesNewline...) + return b, err +} + +func rgSitemapToRelease(p []byte) ([]byte, error) { + var ( + s skate.Sitemap + release skate.Release + ) + if err := json.Unmarshal(p, &s); err != nil { + return nil, err + } + release.Title = s.Title + if len(s.URL) > 41 { + // XXX: A pseudo ident, maybe irritating. + release.Ident = strings.Split(s.URL[41:], "_")[0] + } + release.Extra.Skate.Status = "rg" + release.Extra.Skate.ResearchGate.URL = s.URL + b, err := json.Marshal(release) + b = append(b, bytesNewline...) + return b, err +} + +func main() { + flag.Parse() + switch *fromFormat { + case "ref": + pp := parallel.NewProcessor(os.Stdin, os.Stdout, refToRelease) + pp.NumWorkers = *numWorkers + pp.BatchSize = *batchSize + if err := pp.Run(); err != nil { + log.Fatal(err) + } + case "rg": + pp := parallel.NewProcessor(os.Stdin, os.Stdout, rgSitemapToRelease) + pp.NumWorkers = *numWorkers + pp.BatchSize = *batchSize + if err := pp.Run(); err != nil { + log.Fatal(err) + } + } +} |