package skate import ( "fmt" "io/ioutil" "log" "net/http" "sort" "strings" "time" "github.com/sethgrid/pester" ) const cdxApi = "http://web.archive.org/cdx/search/cdx" // CDXLine line, might add more fields later. type CDXLine struct { Surt string Date string Link string ContentType string StatusCode string Checksum string Size string } type CDX []CDXLine type ByDate CDX func (b ByDate) Len() int { return len(b) } func (b ByDate) Swap(i, j int) { b[i], b[j] = b[j], b[i] } func (b ByDate) Less(i, j int) bool { return b[i].Date < b[j].Date } func (c CDX) Summary() string { var ( dateLast = "NA" dateLastOK = "NA" delta = "NA" ) if len(c) == 0 { return fmt.Sprintf("last=%s ok=%s", dateLast, dateLastOK) } sort.Sort(sort.Reverse(ByDate(c))) dateLast = c[0].Date for _, cdx := range c { if cdx.StatusCode == "200" { dateLastOK = cdx.Date break } } d, err := tsDiff(dateLast, dateLastOK) if err == nil { if d.Hours()/24 > 365 { delta = fmt.Sprintf("\033[31;1;4m%0.0f\033[0m", d.Hours()/24) } else { delta = fmt.Sprintf("%0.0f", d.Hours()/24) } } return fmt.Sprintf("last=%s ok=%s delta=%v", dateLast, dateLastOK, delta) } // tsDiff returns the duration between two timestamps, like: 20140304124333. func tsDiff(a, b string) (time.Duration, error) { ta, err := time.Parse("20060102150405", a) if err != nil { return 0, err } tb, err := time.Parse("20060102150405", b) if err != nil { return 0, err } if ta.Before(tb) { return tb.Sub(ta), nil } else { return ta.Sub(tb), nil } } // LookupCDX asks CDX API. Result will be like: // net,ijmse)/uploadfile/2016/1214/20161214052559646.pdf 20170516210333 // http://www.ijmse.net:80/uploadfile/2016/1214/20161214052559646.pdf // application/pdf 200 PBPHE2OILTB43TAOUO33GBWLE2SS4LQX 2079755 // // Also returns the raw response body. func LookupCDX(link string) (result CDX, b []byte, err error) { link = prependSchema(link) cdxlink := fmt.Sprintf("%s?url=%s", cdxApi, link) log.Printf("[lookup] %s", cdxlink) req, err := http.NewRequest("GET", cdxlink, nil) if err != nil { return nil, b, err } resp, err := pester.Do(req) if err != nil { return nil, b, err } defer resp.Body.Close() if resp.StatusCode >= 400 { return nil, nil, fmt.Errorf("api returned HTTP %v", resp.StatusCode) } b, err = ioutil.ReadAll(resp.Body) if err != nil { return nil, b, err } result, err = ParseCDX(b) return result, b, err } func ParseCDX(b []byte) (result CDX, err error) { for _, line := range strings.Split(string(b), "\n") { var fields = strings.Fields(line) if len(fields) == 0 { continue } if len(fields) < 7 { log.Printf("short line: %s", line) continue } cdx := CDXLine{ Surt: fields[0], Date: fields[1], Link: fields[2], ContentType: fields[3], StatusCode: fields[4], Checksum: fields[5], Size: fields[6], } result = append(result, cdx) } return result, nil } func prependSchema(s string) string { if strings.HasPrefix(s, "http") { return s } return fmt.Sprintf("http://%s", s) }