aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-05-20 14:31:04 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-05-20 14:31:04 +0200
commit8714537ecffa0641516bc01b8cdc4cdd9a9d975c (patch)
tree9e7375daf4e677c00db35a23b3ca933609b6d9ea
parent7be09009b42d3af96ca8875c698922710d92d074 (diff)
downloadrefcat-8714537ecffa0641516bc01b8cdc4cdd9a9d975c.tar.gz
refcat-8714537ecffa0641516bc01b8cdc4cdd9a9d975c.zip
wip: cdx lookup
-rw-r--r--skate/cdx.go73
-rw-r--r--skate/cmd/skate-cdx-lookup/main.go10
-rw-r--r--skate/go.mod4
-rw-r--r--skate/go.sum7
4 files changed, 84 insertions, 10 deletions
diff --git a/skate/cdx.go b/skate/cdx.go
index f98b781..ed394ed 100644
--- a/skate/cdx.go
+++ b/skate/cdx.go
@@ -5,15 +5,17 @@ import (
"io/ioutil"
"log"
"net/http"
+ "sort"
"strings"
+ "time"
"github.com/sethgrid/pester"
)
const cdxApi = "http://web.archive.org/cdx/search/cdx"
-// CDX line, might add more fields later.
-type CDX struct {
+// CDXLine line, might add more fields later.
+type CDXLine struct {
Surt string
Date string
Link string
@@ -23,13 +25,74 @@ type CDX struct {
Size string
}
+type CDX []CDXLine
+
+type ByDate CDX
+
+func (b ByDate) Len() int {
+ return len(b)
+}
+
+func (b ByDate) Swap(i, j int) {
+ b[i], b[j] = b[j], b[i]
+}
+
+func (b ByDate) Less(i, j int) bool {
+ return b[i].Date < b[j].Date
+}
+
+func (c CDX) Summary() string {
+ var (
+ dateLast = "NA"
+ dateLastOK = "NA"
+ delta = "NA"
+ )
+ if len(c) == 0 {
+ return fmt.Sprintf("last=%s ok=%s", dateLast, dateLastOK)
+ }
+ sort.Sort(sort.Reverse(ByDate(c)))
+ dateLast = c[0].Date
+ for _, cdx := range c {
+ if cdx.StatusCode == "200" {
+ dateLastOK = cdx.Date
+ break
+ }
+ }
+ d, err := tsDiff(dateLast, dateLastOK)
+ if err == nil {
+ if d.Hours()/24 > 365 {
+ delta = fmt.Sprintf("\033[31;1;4m%0.0f\033[0m", d.Hours()/24)
+ } else {
+ delta = fmt.Sprintf("%0.0f", d.Hours()/24)
+ }
+ }
+ return fmt.Sprintf("last=%s ok=%s delta=%v", dateLast, dateLastOK, delta)
+}
+
+// tsDiff returns the duration between two timestamps, like: 20140304124333.
+func tsDiff(a, b string) (time.Duration, error) {
+ ta, err := time.Parse("20060102150405", a)
+ if err != nil {
+ return 0, err
+ }
+ tb, err := time.Parse("20060102150405", b)
+ if err != nil {
+ return 0, err
+ }
+ if ta.Before(tb) {
+ return tb.Sub(ta), nil
+ } else {
+ return ta.Sub(tb), nil
+ }
+}
+
// LookupCDX asks CDX API. Result will be like:
// net,ijmse)/uploadfile/2016/1214/20161214052559646.pdf 20170516210333
// http://www.ijmse.net:80/uploadfile/2016/1214/20161214052559646.pdf
// application/pdf 200 PBPHE2OILTB43TAOUO33GBWLE2SS4LQX 2079755
//
// Also returns the raw response body.
-func LookupCDX(link string) (result []CDX, b []byte, err error) {
+func LookupCDX(link string) (result CDX, b []byte, err error) {
link = prependSchema(link)
cdxlink := fmt.Sprintf("%s?url=%s", cdxApi, link)
log.Printf("[lookup] %s", cdxlink)
@@ -50,7 +113,7 @@ func LookupCDX(link string) (result []CDX, b []byte, err error) {
return result, b, err
}
-func ParseCDX(b []byte) (result []CDX, err error) {
+func ParseCDX(b []byte) (result CDX, err error) {
for _, line := range strings.Split(string(b), "\n") {
var fields = strings.Fields(line)
if len(fields) == 0 {
@@ -60,7 +123,7 @@ func ParseCDX(b []byte) (result []CDX, err error) {
log.Printf("short line: %s", line)
continue
}
- cdx := CDX{
+ cdx := CDXLine{
Surt: fields[0],
Date: fields[1],
Link: fields[2],
diff --git a/skate/cmd/skate-cdx-lookup/main.go b/skate/cmd/skate-cdx-lookup/main.go
index e26102f..b480078 100644
--- a/skate/cmd/skate-cdx-lookup/main.go
+++ b/skate/cmd/skate-cdx-lookup/main.go
@@ -36,8 +36,11 @@ func main() {
if *quiet {
log.SetOutput(ioutil.Discard)
}
- var cache = skate.Cache{Dir: *cacheDir}
- br := bufio.NewReader(r)
+ var (
+ cache = skate.Cache{Dir: *cacheDir}
+ br = bufio.NewReader(r)
+ i int
+ )
for {
line, err := br.ReadString('\n')
if err == io.EOF {
@@ -66,6 +69,7 @@ func main() {
if err != nil {
log.Fatal(err)
}
- fmt.Printf("% 10d %s\n", len(rows), line)
+ fmt.Printf("[%05d] % 10d %s %s\n", i, len(rows), rows.Summary(), line)
+ i++
}
}
diff --git a/skate/go.mod b/skate/go.mod
index 911296f..a3d7501 100644
--- a/skate/go.mod
+++ b/skate/go.mod
@@ -3,14 +3,14 @@ module git.archive.org/martin/cgraph/skate
go 1.15
require (
- github.com/adrg/xdg v0.3.3 // indirect
+ github.com/adrg/xdg v0.3.3
github.com/elastic/go-elasticsearch v0.0.0
github.com/elastic/go-elasticsearch/v7 v7.12.0
github.com/klauspost/cpuid/v2 v2.0.6 // indirect
github.com/matryer/is v1.4.0
github.com/nsf/jsondiff v0.0.0-20210303162244-6ea32392771e
github.com/segmentio/encoding v0.2.17
- github.com/sethgrid/pester v1.1.0 // indirect
+ github.com/sethgrid/pester v1.1.0
github.com/tidwall/gjson v1.7.5
golang.org/x/text v0.3.6
mvdan.cc/xurls/v2 v2.2.0
diff --git a/skate/go.sum b/skate/go.sum
index b6a108f..da37d40 100644
--- a/skate/go.sum
+++ b/skate/go.sum
@@ -1,5 +1,6 @@
github.com/adrg/xdg v0.3.3 h1:s/tV7MdqQnzB1nKY8aqHvAMD+uCiuEDzVB5HLRY849U=
github.com/adrg/xdg v0.3.3/go.mod h1:61xAR2VZcggl2St4O9ohF5qCKe08+JDmE4VNzPFQvOQ=
+github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/elastic/go-elasticsearch v0.0.0 h1:Pd5fqOuBxKxv83b0+xOAJDAkziWYwFinWnBO0y+TZaA=
github.com/elastic/go-elasticsearch v0.0.0/go.mod h1:TkBSJBuTyFdBnrNqoPc54FN0vKf5c04IdM4zuStJ7xg=
@@ -8,13 +9,16 @@ github.com/elastic/go-elasticsearch/v7 v7.12.0/go.mod h1:OJ4wdbtDNk5g503kvlHLyEr
github.com/klauspost/cpuid/v2 v2.0.5/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
+github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/matryer/is v1.4.0 h1:sosSmIWwkYITGrxZ25ULNDeKiMNzFSr4V/eqBQP0PeE=
github.com/matryer/is v1.4.0/go.mod h1:8I/i5uYgLzgsgEloJE1U6xx5HkBQpAZvepWuujKwMRU=
github.com/nsf/jsondiff v0.0.0-20210303162244-6ea32392771e h1:S+/ptYdZtpK/MDstwCyt+ZHdXEpz86RJZ5gyZU4txJY=
github.com/nsf/jsondiff v0.0.0-20210303162244-6ea32392771e/go.mod h1:uFMI8w+ref4v2r9jz+c9i1IfIttS/OkmLfrk1jne5hs=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.5.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/segmentio/encoding v0.2.17 h1:cgfmPc44u1po1lz5bSgF00gLCROBjDNc7h+H7I20zpc=
@@ -22,6 +26,7 @@ github.com/segmentio/encoding v0.2.17/go.mod h1:7E68jTSWMnNoYhHi1JbLd7NBSB6XfE4v
github.com/sethgrid/pester v1.1.0 h1:IyEAVvwSUPjs2ACFZkBe5N59BBUpSIkQ71Hr6cM5A+w=
github.com/sethgrid/pester v1.1.0/go.mod h1:Ad7IjTpvzZO8Fl0vh9AzQ+j/jYZfyp2diGwI8m5q+ns=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tidwall/gjson v1.7.5 h1:zmAN/xmX7OtpAkv4Ovfso60r/BiCi5IErCDYGNJu+uc=
github.com/tidwall/gjson v1.7.5/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk=
@@ -34,8 +39,10 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
mvdan.cc/xurls/v2 v2.2.0 h1:NSZPykBXJFCetGZykLAxaL6SIpvbVy/UFEniIfHAa8A=
mvdan.cc/xurls/v2 v2.2.0/go.mod h1:EV1RMtya9D6G5DMYPGD8zTQzaHet6Jh8gFlRgGRJeO8=