aboutsummaryrefslogtreecommitdiffstats
path: root/skate/cdx.go
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-05-20 14:31:04 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-05-20 14:31:04 +0200
commit8714537ecffa0641516bc01b8cdc4cdd9a9d975c (patch)
tree9e7375daf4e677c00db35a23b3ca933609b6d9ea /skate/cdx.go
parent7be09009b42d3af96ca8875c698922710d92d074 (diff)
downloadrefcat-8714537ecffa0641516bc01b8cdc4cdd9a9d975c.tar.gz
refcat-8714537ecffa0641516bc01b8cdc4cdd9a9d975c.zip
wip: cdx lookup
Diffstat (limited to 'skate/cdx.go')
-rw-r--r--skate/cdx.go73
1 files changed, 68 insertions, 5 deletions
diff --git a/skate/cdx.go b/skate/cdx.go
index f98b781..ed394ed 100644
--- a/skate/cdx.go
+++ b/skate/cdx.go
@@ -5,15 +5,17 @@ import (
"io/ioutil"
"log"
"net/http"
+ "sort"
"strings"
+ "time"
"github.com/sethgrid/pester"
)
const cdxApi = "http://web.archive.org/cdx/search/cdx"
-// CDX line, might add more fields later.
-type CDX struct {
+// CDXLine line, might add more fields later.
+type CDXLine struct {
Surt string
Date string
Link string
@@ -23,13 +25,74 @@ type CDX struct {
Size string
}
+type CDX []CDXLine
+
+type ByDate CDX
+
+func (b ByDate) Len() int {
+ return len(b)
+}
+
+func (b ByDate) Swap(i, j int) {
+ b[i], b[j] = b[j], b[i]
+}
+
+func (b ByDate) Less(i, j int) bool {
+ return b[i].Date < b[j].Date
+}
+
+func (c CDX) Summary() string {
+ var (
+ dateLast = "NA"
+ dateLastOK = "NA"
+ delta = "NA"
+ )
+ if len(c) == 0 {
+ return fmt.Sprintf("last=%s ok=%s", dateLast, dateLastOK)
+ }
+ sort.Sort(sort.Reverse(ByDate(c)))
+ dateLast = c[0].Date
+ for _, cdx := range c {
+ if cdx.StatusCode == "200" {
+ dateLastOK = cdx.Date
+ break
+ }
+ }
+ d, err := tsDiff(dateLast, dateLastOK)
+ if err == nil {
+ if d.Hours()/24 > 365 {
+ delta = fmt.Sprintf("\033[31;1;4m%0.0f\033[0m", d.Hours()/24)
+ } else {
+ delta = fmt.Sprintf("%0.0f", d.Hours()/24)
+ }
+ }
+ return fmt.Sprintf("last=%s ok=%s delta=%v", dateLast, dateLastOK, delta)
+}
+
+// tsDiff returns the duration between two timestamps, like: 20140304124333.
+func tsDiff(a, b string) (time.Duration, error) {
+ ta, err := time.Parse("20060102150405", a)
+ if err != nil {
+ return 0, err
+ }
+ tb, err := time.Parse("20060102150405", b)
+ if err != nil {
+ return 0, err
+ }
+ if ta.Before(tb) {
+ return tb.Sub(ta), nil
+ } else {
+ return ta.Sub(tb), nil
+ }
+}
+
// LookupCDX asks CDX API. Result will be like:
// net,ijmse)/uploadfile/2016/1214/20161214052559646.pdf 20170516210333
// http://www.ijmse.net:80/uploadfile/2016/1214/20161214052559646.pdf
// application/pdf 200 PBPHE2OILTB43TAOUO33GBWLE2SS4LQX 2079755
//
// Also returns the raw response body.
-func LookupCDX(link string) (result []CDX, b []byte, err error) {
+func LookupCDX(link string) (result CDX, b []byte, err error) {
link = prependSchema(link)
cdxlink := fmt.Sprintf("%s?url=%s", cdxApi, link)
log.Printf("[lookup] %s", cdxlink)
@@ -50,7 +113,7 @@ func LookupCDX(link string) (result []CDX, b []byte, err error) {
return result, b, err
}
-func ParseCDX(b []byte) (result []CDX, err error) {
+func ParseCDX(b []byte) (result CDX, err error) {
for _, line := range strings.Split(string(b), "\n") {
var fields = strings.Fields(line)
if len(fields) == 0 {
@@ -60,7 +123,7 @@ func ParseCDX(b []byte) (result []CDX, err error) {
log.Printf("short line: %s", line)
continue
}
- cdx := CDX{
+ cdx := CDXLine{
Surt: fields[0],
Date: fields[1],
Link: fields[2],