From d60dff7db926cc40a288584ac3f9970bb85c30c0 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 31 Mar 2021 23:54:28 +0200 Subject: rename to: skate-wikipedia-doi --- python/refcat/tasks.py | 2 +- skate/Makefile | 2 +- skate/README.md | 2 -- skate/cmd/skate-biblioref-from-wikipedia/main.go | 43 ------------------------ skate/cmd/skate-wikipedia-doi/main.go | 43 ++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 47 deletions(-) delete mode 100644 skate/cmd/skate-biblioref-from-wikipedia/main.go create mode 100644 skate/cmd/skate-wikipedia-doi/main.go diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index bceff2a..8459608 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -1352,7 +1352,7 @@ class BiblioRefWikiDOISortedKeys(Refcat): def run(self): output = shellout(""" cat {input} | - skate-biblioref-from-wikipedia | + skate-wikipedia-doi | LC_ALL=C sort -S 10% -k2,2 | zstd -T0 -c > {output} """, input=self.input().path) diff --git a/skate/Makefile b/skate/Makefile index e4c584d..ccaf08e 100644 --- a/skate/Makefile +++ b/skate/Makefile @@ -1,5 +1,5 @@ SHELL := /bin/bash -TARGETS := skate-ref-to-release skate-derive-key skate-cluster skate-verify skate-to-doi skate-bref-id skate-from-unstructured skate-biblioref-from-wikipedia +TARGETS := skate-ref-to-release skate-derive-key skate-cluster skate-verify skate-to-doi skate-bref-id skate-from-unstructured skate-wikipedia-doi PKGNAME := skate .PHONY: test diff --git a/skate/README.md b/skate/README.md index 6c39717..bd66c3d 100644 --- a/skate/README.md +++ b/skate/README.md @@ -5,11 +5,9 @@ citation graph pipeline. ## Tools -### skate-biblioref ### skate-biblioref-from-wikipedia ### skate-bref-id ### skate-cluster -### skate-cluster-stats ### skate-derive-key ### skate-from-unstructured ### skate-ref-to-release diff --git a/skate/cmd/skate-biblioref-from-wikipedia/main.go b/skate/cmd/skate-biblioref-from-wikipedia/main.go deleted file mode 100644 index 15b0d0a..0000000 --- a/skate/cmd/skate-biblioref-from-wikipedia/main.go +++ /dev/null @@ -1,43 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "log" - "os" - "runtime" - "strings" - - "git.archive.org/martin/cgraph/skate" - "git.archive.org/martin/cgraph/skate/parallel" - jsoniter "github.com/json-iterator/go" -) - -var ( - numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") - batchSize = flag.Int("b", 100000, "batch size") - json = jsoniter.ConfigCompatibleWithStandardLibrary - bytesNewline = []byte("\n") -) - -func main() { - flag.Parse() - pp := parallel.NewProcessor(os.Stdin, os.Stdout, func(p []byte) ([]byte, error) { - var w skate.MinimalCitations - if err := json.Unmarshal(p, &w); err != nil { - return nil, err - } - idl := w.ParseIDList() - if idl.DOI == "" { - return nil, nil - } - s := fmt.Sprintf("%s\t%s\t%s", strings.TrimSpace(w.PageTitle), idl.DOI, string(p)) - return []byte(s), nil - - }) - pp.NumWorkers = *numWorkers - pp.BatchSize = *batchSize - if err := pp.Run(); err != nil { - log.Fatal(err) - } -} diff --git a/skate/cmd/skate-wikipedia-doi/main.go b/skate/cmd/skate-wikipedia-doi/main.go new file mode 100644 index 0000000..15b0d0a --- /dev/null +++ b/skate/cmd/skate-wikipedia-doi/main.go @@ -0,0 +1,43 @@ +package main + +import ( + "flag" + "fmt" + "log" + "os" + "runtime" + "strings" + + "git.archive.org/martin/cgraph/skate" + "git.archive.org/martin/cgraph/skate/parallel" + jsoniter "github.com/json-iterator/go" +) + +var ( + numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") + batchSize = flag.Int("b", 100000, "batch size") + json = jsoniter.ConfigCompatibleWithStandardLibrary + bytesNewline = []byte("\n") +) + +func main() { + flag.Parse() + pp := parallel.NewProcessor(os.Stdin, os.Stdout, func(p []byte) ([]byte, error) { + var w skate.MinimalCitations + if err := json.Unmarshal(p, &w); err != nil { + return nil, err + } + idl := w.ParseIDList() + if idl.DOI == "" { + return nil, nil + } + s := fmt.Sprintf("%s\t%s\t%s", strings.TrimSpace(w.PageTitle), idl.DOI, string(p)) + return []byte(s), nil + + }) + pp.NumWorkers = *numWorkers + pp.BatchSize = *batchSize + if err := pp.Run(); err != nil { + log.Fatal(err) + } +} -- cgit v1.2.3