diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-05-20 14:31:04 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-05-20 14:31:04 +0200 |
commit | 8714537ecffa0641516bc01b8cdc4cdd9a9d975c (patch) | |
tree | 9e7375daf4e677c00db35a23b3ca933609b6d9ea /skate/cdx.go | |
parent | 7be09009b42d3af96ca8875c698922710d92d074 (diff) | |
download | refcat-8714537ecffa0641516bc01b8cdc4cdd9a9d975c.tar.gz refcat-8714537ecffa0641516bc01b8cdc4cdd9a9d975c.zip |
wip: cdx lookup
Diffstat (limited to 'skate/cdx.go')
-rw-r--r-- | skate/cdx.go | 73 |
1 files changed, 68 insertions, 5 deletions
diff --git a/skate/cdx.go b/skate/cdx.go index f98b781..ed394ed 100644 --- a/skate/cdx.go +++ b/skate/cdx.go @@ -5,15 +5,17 @@ import ( "io/ioutil" "log" "net/http" + "sort" "strings" + "time" "github.com/sethgrid/pester" ) const cdxApi = "http://web.archive.org/cdx/search/cdx" -// CDX line, might add more fields later. -type CDX struct { +// CDXLine line, might add more fields later. +type CDXLine struct { Surt string Date string Link string @@ -23,13 +25,74 @@ type CDX struct { Size string } +type CDX []CDXLine + +type ByDate CDX + +func (b ByDate) Len() int { + return len(b) +} + +func (b ByDate) Swap(i, j int) { + b[i], b[j] = b[j], b[i] +} + +func (b ByDate) Less(i, j int) bool { + return b[i].Date < b[j].Date +} + +func (c CDX) Summary() string { + var ( + dateLast = "NA" + dateLastOK = "NA" + delta = "NA" + ) + if len(c) == 0 { + return fmt.Sprintf("last=%s ok=%s", dateLast, dateLastOK) + } + sort.Sort(sort.Reverse(ByDate(c))) + dateLast = c[0].Date + for _, cdx := range c { + if cdx.StatusCode == "200" { + dateLastOK = cdx.Date + break + } + } + d, err := tsDiff(dateLast, dateLastOK) + if err == nil { + if d.Hours()/24 > 365 { + delta = fmt.Sprintf("\033[31;1;4m%0.0f\033[0m", d.Hours()/24) + } else { + delta = fmt.Sprintf("%0.0f", d.Hours()/24) + } + } + return fmt.Sprintf("last=%s ok=%s delta=%v", dateLast, dateLastOK, delta) +} + +// tsDiff returns the duration between two timestamps, like: 20140304124333. +func tsDiff(a, b string) (time.Duration, error) { + ta, err := time.Parse("20060102150405", a) + if err != nil { + return 0, err + } + tb, err := time.Parse("20060102150405", b) + if err != nil { + return 0, err + } + if ta.Before(tb) { + return tb.Sub(ta), nil + } else { + return ta.Sub(tb), nil + } +} + // LookupCDX asks CDX API. Result will be like: // net,ijmse)/uploadfile/2016/1214/20161214052559646.pdf 20170516210333 // http://www.ijmse.net:80/uploadfile/2016/1214/20161214052559646.pdf // application/pdf 200 PBPHE2OILTB43TAOUO33GBWLE2SS4LQX 2079755 // // Also returns the raw response body. -func LookupCDX(link string) (result []CDX, b []byte, err error) { +func LookupCDX(link string) (result CDX, b []byte, err error) { link = prependSchema(link) cdxlink := fmt.Sprintf("%s?url=%s", cdxApi, link) log.Printf("[lookup] %s", cdxlink) @@ -50,7 +113,7 @@ func LookupCDX(link string) (result []CDX, b []byte, err error) { return result, b, err } -func ParseCDX(b []byte) (result []CDX, err error) { +func ParseCDX(b []byte) (result CDX, err error) { for _, line := range strings.Split(string(b), "\n") { var fields = strings.Fields(line) if len(fields) == 0 { @@ -60,7 +123,7 @@ func ParseCDX(b []byte) (result []CDX, err error) { log.Printf("short line: %s", line) continue } - cdx := CDX{ + cdx := CDXLine{ Surt: fields[0], Date: fields[1], Link: fields[2], |