aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-04-21 17:49:12 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-04-21 17:49:12 +0200
commit3cb5513cb405af78a01750a29a93be28ac5d90e4 (patch)
tree46774566984606a113a17803da81f3b14ada742f /skate
parentae9e380225be648ced23d814cd1d08d1621976bd (diff)
downloadrefcat-3cb5513cb405af78a01750a29a93be28ac5d90e4.tar.gz
refcat-3cb5513cb405af78a01750a29a93be28ac5d90e4.zip
wip: a few dot examples
Diffstat (limited to 'skate')
-rw-r--r--skate/.gitignore2
-rw-r--r--skate/Makefile2
-rw-r--r--skate/cmd/skate-dot/main.go80
-rw-r--r--skate/go.mod2
-rw-r--r--skate/go.sum4
-rw-r--r--skate/wordwrap/wordwrap.go83
-rw-r--r--skate/wordwrap/wordwrap_test.go99
7 files changed, 270 insertions, 2 deletions
diff --git a/skate/.gitignore b/skate/.gitignore
index 4e893a0..031329a 100644
--- a/skate/.gitignore
+++ b/skate/.gitignore
@@ -24,4 +24,4 @@
/skate-wikipedia-doi
packaging/debian/skate/usr
skate_*_amd64.deb
-
+/skate-dot
diff --git a/skate/Makefile b/skate/Makefile
index ccaf08e..fc36c8e 100644
--- a/skate/Makefile
+++ b/skate/Makefile
@@ -1,5 +1,5 @@
SHELL := /bin/bash
-TARGETS := skate-ref-to-release skate-derive-key skate-cluster skate-verify skate-to-doi skate-bref-id skate-from-unstructured skate-wikipedia-doi
+TARGETS := skate-ref-to-release skate-derive-key skate-cluster skate-verify skate-to-doi skate-bref-id skate-from-unstructured skate-wikipedia-doi skate-dot
PKGNAME := skate
.PHONY: test
diff --git a/skate/cmd/skate-dot/main.go b/skate/cmd/skate-dot/main.go
new file mode 100644
index 0000000..97b70ad
--- /dev/null
+++ b/skate/cmd/skate-dot/main.go
@@ -0,0 +1,80 @@
+// skate-dot generates dot files from inbound and outbound citation links.
+package main
+
+import (
+ "context"
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "os"
+
+ "github.com/elastic/go-elasticsearch/esapi"
+ elasticsearch "github.com/elastic/go-elasticsearch/v7"
+)
+
+var (
+ es = flag.String("es", "http://localhost:9200", "elasticsearch holding fatcat_ref index")
+ index = flag.String("x", "fatcat_ref_v01", "index name")
+ fatcat = flag.String("f", "https://api.fatcat.wiki/v0", "fatcat api")
+ ident = flag.String("i", "2kw3xjf2cbcmdlm3ihkoz2t4lu", "release ident")
+)
+
+func main() {
+ flag.Parse()
+ // s := "The Determination of Concentration and Type of Ownership on Bank Performance and Risks in Indonesia"
+ // fmt.Printf("%s\n", wordwrap.WrapString(s, 20))
+ cfg := elasticsearch.Config{
+ Addresses: []string{
+ *es,
+ },
+ }
+ es, err := elasticsearch.NewClient(cfg)
+ if err != nil {
+ log.Fatal(err)
+ }
+ client := &Client{
+ Api: *fatcat,
+ Es: es,
+ Index: *index,
+ }
+ log.Println(client)
+ client.Outbound(*ident)
+ client.Inbound(*ident)
+}
+
+// A client for fatcat and elasticsearch.
+type Client struct {
+ Api string
+ Es *elasticsearch.Client
+ Index string
+}
+
+func (c *Client) String() string {
+ info, _ := c.Es.Info()
+ return fmt.Sprintf("%s %s (%s) %s", c.Api, info, elasticsearch.Version, c.Index)
+}
+
+func (c *Client) Inbound(ident string) []string {
+ resp, err := esapi.Search(
+ esapi.Search.WithContext(context.Background()),
+ esapi.Search.WithIndex(c.Index),
+ )
+ if err != nil {
+ log.Fatal(err)
+ }
+ io.Copy(os.Stdout, resp.Body)
+ return nil
+}
+
+func (c *Client) Outbound(ident string) []string {
+ req := &esapi.SearchRequest{
+ Query: fmt.Sprintf("source_release_ident:%s", ident),
+ }
+ resp, err := req.Do(context.Background(), c.Es)
+ if err != nil {
+ log.Fatal(err)
+ }
+ io.Copy(os.Stdout, resp.Body)
+ return nil
+}
diff --git a/skate/go.mod b/skate/go.mod
index e8678c0..c14fd53 100644
--- a/skate/go.mod
+++ b/skate/go.mod
@@ -5,6 +5,8 @@ go 1.15
require (
github.com/colinmarc/hdfs v1.1.3 // indirect
github.com/dgraph-io/ristretto v0.0.3
+ github.com/elastic/go-elasticsearch v0.0.0 // indirect
+ github.com/elastic/go-elasticsearch/v7 v7.12.0 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/json-iterator/go v1.1.10
github.com/kr/pretty v0.2.1 // indirect
diff --git a/skate/go.sum b/skate/go.sum
index 8690252..1d77ab9 100644
--- a/skate/go.sum
+++ b/skate/go.sum
@@ -11,6 +11,10 @@ github.com/dgraph-io/ristretto v0.0.3 h1:jh22xisGBjrEVnRZ1DVTpBVQm0Xndu8sMl0CWDz
github.com/dgraph-io/ristretto v0.0.3/go.mod h1:KPxhHT9ZxKefz+PCeOGsrHpl1qZ7i70dGTu2u+Ahh6E=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
+github.com/elastic/go-elasticsearch v0.0.0 h1:Pd5fqOuBxKxv83b0+xOAJDAkziWYwFinWnBO0y+TZaA=
+github.com/elastic/go-elasticsearch v0.0.0/go.mod h1:TkBSJBuTyFdBnrNqoPc54FN0vKf5c04IdM4zuStJ7xg=
+github.com/elastic/go-elasticsearch/v7 v7.12.0 h1:j4tvcMrZJLp39L2NYvBb7f+lHKPqPHSL3nvB8+/DV+s=
+github.com/elastic/go-elasticsearch/v7 v7.12.0/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
diff --git a/skate/wordwrap/wordwrap.go b/skate/wordwrap/wordwrap.go
new file mode 100644
index 0000000..f7bedda
--- /dev/null
+++ b/skate/wordwrap/wordwrap.go
@@ -0,0 +1,83 @@
+package wordwrap
+
+import (
+ "bytes"
+ "unicode"
+)
+
+const nbsp = 0xA0
+
+// WrapString wraps the given string within lim width in characters.
+//
+// Wrapping is currently naive and only happens at white-space. A future
+// version of the library will implement smarter wrapping. This means that
+// pathological cases can dramatically reach past the limit, such as a very
+// long word.
+func WrapString(s string, lim uint) string {
+ // Initialize a buffer with a slightly larger size to account for breaks
+ init := make([]byte, 0, len(s))
+ buf := bytes.NewBuffer(init)
+
+ var current uint
+ var wordBuf, spaceBuf bytes.Buffer
+ var wordBufLen, spaceBufLen uint
+
+ for _, char := range s {
+ if char == '\n' {
+ if wordBuf.Len() == 0 {
+ if current+spaceBufLen > lim {
+ current = 0
+ } else {
+ current += spaceBufLen
+ spaceBuf.WriteTo(buf)
+ }
+ spaceBuf.Reset()
+ spaceBufLen = 0
+ } else {
+ current += spaceBufLen + wordBufLen
+ spaceBuf.WriteTo(buf)
+ spaceBuf.Reset()
+ spaceBufLen = 0
+ wordBuf.WriteTo(buf)
+ wordBuf.Reset()
+ wordBufLen = 0
+ }
+ buf.WriteRune(char)
+ current = 0
+ } else if unicode.IsSpace(char) && char != nbsp {
+ if spaceBuf.Len() == 0 || wordBuf.Len() > 0 {
+ current += spaceBufLen + wordBufLen
+ spaceBuf.WriteTo(buf)
+ spaceBuf.Reset()
+ spaceBufLen = 0
+ wordBuf.WriteTo(buf)
+ wordBuf.Reset()
+ wordBufLen = 0
+ }
+
+ spaceBuf.WriteRune(char)
+ spaceBufLen++
+ } else {
+ wordBuf.WriteRune(char)
+ wordBufLen++
+
+ if current+wordBufLen+spaceBufLen > lim && wordBufLen < lim {
+ buf.WriteRune('\n')
+ current = 0
+ spaceBuf.Reset()
+ spaceBufLen = 0
+ }
+ }
+ }
+
+ if wordBuf.Len() == 0 {
+ if current+spaceBufLen <= lim {
+ spaceBuf.WriteTo(buf)
+ }
+ } else {
+ spaceBuf.WriteTo(buf)
+ wordBuf.WriteTo(buf)
+ }
+
+ return buf.String()
+}
diff --git a/skate/wordwrap/wordwrap_test.go b/skate/wordwrap/wordwrap_test.go
new file mode 100644
index 0000000..98010eb
--- /dev/null
+++ b/skate/wordwrap/wordwrap_test.go
@@ -0,0 +1,99 @@
+package wordwrap
+
+import (
+ "strings"
+ "testing"
+)
+
+func TestWrapString(t *testing.T) {
+ cases := []struct {
+ Input, Output string
+ Lim uint
+ }{
+ // A simple word passes through.
+ {
+ "foo",
+ "foo",
+ 4,
+ },
+ // A single word that is too long passes through.
+ // We do not break words.
+ {
+ "foobarbaz",
+ "foobarbaz",
+ 4,
+ },
+ // Lines are broken at whitespace.
+ {
+ "foo bar baz",
+ "foo\nbar\nbaz",
+ 4,
+ },
+ // Lines are broken at whitespace, even if words
+ // are too long. We do not break words.
+ {
+ "foo bars bazzes",
+ "foo\nbars\nbazzes",
+ 4,
+ },
+ // A word that would run beyond the width is wrapped.
+ {
+ "fo sop",
+ "fo\nsop",
+ 4,
+ },
+ // Do not break on non-breaking space.
+ {
+ "foo bar\u00A0baz",
+ "foo\nbar\u00A0baz",
+ 10,
+ },
+ // Whitespace that trails a line and fits the width
+ // passes through, as does whitespace prefixing an
+ // explicit line break. A tab counts as one character.
+ {
+ "foo\nb\t r\n baz",
+ "foo\nb\t r\n baz",
+ 4,
+ },
+ // Trailing whitespace is removed if it doesn't fit the width.
+ // Runs of whitespace on which a line is broken are removed.
+ {
+ "foo \nb ar ",
+ "foo\nb\nar",
+ 4,
+ },
+ // An explicit line break at the end of the input is preserved.
+ {
+ "foo bar baz\n",
+ "foo\nbar\nbaz\n",
+ 4,
+ },
+ // Explicit break are always preserved.
+ {
+ "\nfoo bar\n\n\nbaz\n",
+ "\nfoo\nbar\n\n\nbaz\n",
+ 4,
+ },
+ // Complete example:
+ {
+ " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* baz \nBAM ",
+ " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* baz\nBAM",
+ 6,
+ },
+ // Multi-byte characters
+ {
+ strings.Repeat("\u2584 ", 4),
+ "\u2584 \u2584" + "\n" +
+ strings.Repeat("\u2584 ", 2),
+ 4,
+ },
+ }
+
+ for i, tc := range cases {
+ actual := WrapString(tc.Input, tc.Lim)
+ if actual != tc.Output {
+ t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n\n`%s`", i, tc.Input, tc.Output, actual)
+ }
+ }
+}