From 3cb5513cb405af78a01750a29a93be28ac5d90e4 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 21 Apr 2021 17:49:12 +0200 Subject: wip: a few dot examples --- skate/.gitignore | 2 +- skate/Makefile | 2 +- skate/cmd/skate-dot/main.go | 80 +++++++++++++++++++++++++++++++++ skate/go.mod | 2 + skate/go.sum | 4 ++ skate/wordwrap/wordwrap.go | 83 ++++++++++++++++++++++++++++++++++ skate/wordwrap/wordwrap_test.go | 99 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 270 insertions(+), 2 deletions(-) create mode 100644 skate/cmd/skate-dot/main.go create mode 100644 skate/wordwrap/wordwrap.go create mode 100644 skate/wordwrap/wordwrap_test.go (limited to 'skate') diff --git a/skate/.gitignore b/skate/.gitignore index 4e893a0..031329a 100644 --- a/skate/.gitignore +++ b/skate/.gitignore @@ -24,4 +24,4 @@ /skate-wikipedia-doi packaging/debian/skate/usr skate_*_amd64.deb - +/skate-dot diff --git a/skate/Makefile b/skate/Makefile index ccaf08e..fc36c8e 100644 --- a/skate/Makefile +++ b/skate/Makefile @@ -1,5 +1,5 @@ SHELL := /bin/bash -TARGETS := skate-ref-to-release skate-derive-key skate-cluster skate-verify skate-to-doi skate-bref-id skate-from-unstructured skate-wikipedia-doi +TARGETS := skate-ref-to-release skate-derive-key skate-cluster skate-verify skate-to-doi skate-bref-id skate-from-unstructured skate-wikipedia-doi skate-dot PKGNAME := skate .PHONY: test diff --git a/skate/cmd/skate-dot/main.go b/skate/cmd/skate-dot/main.go new file mode 100644 index 0000000..97b70ad --- /dev/null +++ b/skate/cmd/skate-dot/main.go @@ -0,0 +1,80 @@ +// skate-dot generates dot files from inbound and outbound citation links. +package main + +import ( + "context" + "flag" + "fmt" + "io" + "log" + "os" + + "github.com/elastic/go-elasticsearch/esapi" + elasticsearch "github.com/elastic/go-elasticsearch/v7" +) + +var ( + es = flag.String("es", "http://localhost:9200", "elasticsearch holding fatcat_ref index") + index = flag.String("x", "fatcat_ref_v01", "index name") + fatcat = flag.String("f", "https://api.fatcat.wiki/v0", "fatcat api") + ident = flag.String("i", "2kw3xjf2cbcmdlm3ihkoz2t4lu", "release ident") +) + +func main() { + flag.Parse() + // s := "The Determination of Concentration and Type of Ownership on Bank Performance and Risks in Indonesia" + // fmt.Printf("%s\n", wordwrap.WrapString(s, 20)) + cfg := elasticsearch.Config{ + Addresses: []string{ + *es, + }, + } + es, err := elasticsearch.NewClient(cfg) + if err != nil { + log.Fatal(err) + } + client := &Client{ + Api: *fatcat, + Es: es, + Index: *index, + } + log.Println(client) + client.Outbound(*ident) + client.Inbound(*ident) +} + +// A client for fatcat and elasticsearch. +type Client struct { + Api string + Es *elasticsearch.Client + Index string +} + +func (c *Client) String() string { + info, _ := c.Es.Info() + return fmt.Sprintf("%s %s (%s) %s", c.Api, info, elasticsearch.Version, c.Index) +} + +func (c *Client) Inbound(ident string) []string { + resp, err := esapi.Search( + esapi.Search.WithContext(context.Background()), + esapi.Search.WithIndex(c.Index), + ) + if err != nil { + log.Fatal(err) + } + io.Copy(os.Stdout, resp.Body) + return nil +} + +func (c *Client) Outbound(ident string) []string { + req := &esapi.SearchRequest{ + Query: fmt.Sprintf("source_release_ident:%s", ident), + } + resp, err := req.Do(context.Background(), c.Es) + if err != nil { + log.Fatal(err) + } + io.Copy(os.Stdout, resp.Body) + return nil +} diff --git a/skate/go.mod b/skate/go.mod index e8678c0..c14fd53 100644 --- a/skate/go.mod +++ b/skate/go.mod @@ -5,6 +5,8 @@ go 1.15 require ( github.com/colinmarc/hdfs v1.1.3 // indirect github.com/dgraph-io/ristretto v0.0.3 + github.com/elastic/go-elasticsearch v0.0.0 // indirect + github.com/elastic/go-elasticsearch/v7 v7.12.0 // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/json-iterator/go v1.1.10 github.com/kr/pretty v0.2.1 // indirect diff --git a/skate/go.sum b/skate/go.sum index 8690252..1d77ab9 100644 --- a/skate/go.sum +++ b/skate/go.sum @@ -11,6 +11,10 @@ github.com/dgraph-io/ristretto v0.0.3 h1:jh22xisGBjrEVnRZ1DVTpBVQm0Xndu8sMl0CWDz github.com/dgraph-io/ristretto v0.0.3/go.mod h1:KPxhHT9ZxKefz+PCeOGsrHpl1qZ7i70dGTu2u+Ahh6E= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= +github.com/elastic/go-elasticsearch v0.0.0 h1:Pd5fqOuBxKxv83b0+xOAJDAkziWYwFinWnBO0y+TZaA= +github.com/elastic/go-elasticsearch v0.0.0/go.mod h1:TkBSJBuTyFdBnrNqoPc54FN0vKf5c04IdM4zuStJ7xg= +github.com/elastic/go-elasticsearch/v7 v7.12.0 h1:j4tvcMrZJLp39L2NYvBb7f+lHKPqPHSL3nvB8+/DV+s= +github.com/elastic/go-elasticsearch/v7 v7.12.0/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= diff --git a/skate/wordwrap/wordwrap.go b/skate/wordwrap/wordwrap.go new file mode 100644 index 0000000..f7bedda --- /dev/null +++ b/skate/wordwrap/wordwrap.go @@ -0,0 +1,83 @@ +package wordwrap + +import ( + "bytes" + "unicode" +) + +const nbsp = 0xA0 + +// WrapString wraps the given string within lim width in characters. +// +// Wrapping is currently naive and only happens at white-space. A future +// version of the library will implement smarter wrapping. This means that +// pathological cases can dramatically reach past the limit, such as a very +// long word. +func WrapString(s string, lim uint) string { + // Initialize a buffer with a slightly larger size to account for breaks + init := make([]byte, 0, len(s)) + buf := bytes.NewBuffer(init) + + var current uint + var wordBuf, spaceBuf bytes.Buffer + var wordBufLen, spaceBufLen uint + + for _, char := range s { + if char == '\n' { + if wordBuf.Len() == 0 { + if current+spaceBufLen > lim { + current = 0 + } else { + current += spaceBufLen + spaceBuf.WriteTo(buf) + } + spaceBuf.Reset() + spaceBufLen = 0 + } else { + current += spaceBufLen + wordBufLen + spaceBuf.WriteTo(buf) + spaceBuf.Reset() + spaceBufLen = 0 + wordBuf.WriteTo(buf) + wordBuf.Reset() + wordBufLen = 0 + } + buf.WriteRune(char) + current = 0 + } else if unicode.IsSpace(char) && char != nbsp { + if spaceBuf.Len() == 0 || wordBuf.Len() > 0 { + current += spaceBufLen + wordBufLen + spaceBuf.WriteTo(buf) + spaceBuf.Reset() + spaceBufLen = 0 + wordBuf.WriteTo(buf) + wordBuf.Reset() + wordBufLen = 0 + } + + spaceBuf.WriteRune(char) + spaceBufLen++ + } else { + wordBuf.WriteRune(char) + wordBufLen++ + + if current+wordBufLen+spaceBufLen > lim && wordBufLen < lim { + buf.WriteRune('\n') + current = 0 + spaceBuf.Reset() + spaceBufLen = 0 + } + } + } + + if wordBuf.Len() == 0 { + if current+spaceBufLen <= lim { + spaceBuf.WriteTo(buf) + } + } else { + spaceBuf.WriteTo(buf) + wordBuf.WriteTo(buf) + } + + return buf.String() +} diff --git a/skate/wordwrap/wordwrap_test.go b/skate/wordwrap/wordwrap_test.go new file mode 100644 index 0000000..98010eb --- /dev/null +++ b/skate/wordwrap/wordwrap_test.go @@ -0,0 +1,99 @@ +package wordwrap + +import ( + "strings" + "testing" +) + +func TestWrapString(t *testing.T) { + cases := []struct { + Input, Output string + Lim uint + }{ + // A simple word passes through. + { + "foo", + "foo", + 4, + }, + // A single word that is too long passes through. + // We do not break words. + { + "foobarbaz", + "foobarbaz", + 4, + }, + // Lines are broken at whitespace. + { + "foo bar baz", + "foo\nbar\nbaz", + 4, + }, + // Lines are broken at whitespace, even if words + // are too long. We do not break words. + { + "foo bars bazzes", + "foo\nbars\nbazzes", + 4, + }, + // A word that would run beyond the width is wrapped. + { + "fo sop", + "fo\nsop", + 4, + }, + // Do not break on non-breaking space. + { + "foo bar\u00A0baz", + "foo\nbar\u00A0baz", + 10, + }, + // Whitespace that trails a line and fits the width + // passes through, as does whitespace prefixing an + // explicit line break. A tab counts as one character. + { + "foo\nb\t r\n baz", + "foo\nb\t r\n baz", + 4, + }, + // Trailing whitespace is removed if it doesn't fit the width. + // Runs of whitespace on which a line is broken are removed. + { + "foo \nb ar ", + "foo\nb\nar", + 4, + }, + // An explicit line break at the end of the input is preserved. + { + "foo bar baz\n", + "foo\nbar\nbaz\n", + 4, + }, + // Explicit break are always preserved. + { + "\nfoo bar\n\n\nbaz\n", + "\nfoo\nbar\n\n\nbaz\n", + 4, + }, + // Complete example: + { + " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* baz \nBAM ", + " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* baz\nBAM", + 6, + }, + // Multi-byte characters + { + strings.Repeat("\u2584 ", 4), + "\u2584 \u2584" + "\n" + + strings.Repeat("\u2584 ", 2), + 4, + }, + } + + for i, tc := range cases { + actual := WrapString(tc.Input, tc.Lim) + if actual != tc.Output { + t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n\n`%s`", i, tc.Input, tc.Output, actual) + } + } +} -- cgit v1.2.3