From cec830ea500b50e9d38b0d6193b22cd10f577370 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 20 May 2021 11:39:50 +0200 Subject: add slim cdx wrapper --- skate/cdx.go | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 skate/cdx.go diff --git a/skate/cdx.go b/skate/cdx.go new file mode 100644 index 0000000..c618c97 --- /dev/null +++ b/skate/cdx.go @@ -0,0 +1,74 @@ +package skate + +import ( + "fmt" + "io/ioutil" + "log" + "net/http" + "strings" + + "github.com/sethgrid/pester" +) + +const cdxApi = "http://web.archive.org/cdx/search/cdx" + +// CDX line, might add more fields later. +type CDX struct { + Surt string + Date string + Link string + ContentType string + StatusCode string + Checksum string + Size string +} + +// LookupCDX asks CDX API. Result will be like: +// net,ijmse)/uploadfile/2016/1214/20161214052559646.pdf 20170516210333 +// http://www.ijmse.net:80/uploadfile/2016/1214/20161214052559646.pdf +// application/pdf 200 PBPHE2OILTB43TAOUO33GBWLE2SS4LQX 2079755 +func LookupCDX(link string) (result []CDX, err error) { + link = prependSchema(link) + cdxlink := fmt.Sprintf("%s?url=%s", cdxApi, link) + log.Printf("[lookup] %s", cdxlink) + req, err := http.NewRequest("GET", cdxlink, nil) + if err != nil { + return nil, err + } + resp, err := pester.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + b, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, err + } + for _, line := range strings.Split(string(b), "\n") { + var fields = strings.Fields(line) + if len(fields) == 0 { + continue + } + if len(fields) < 7 { + log.Printf("short line: %s", line) + } + cdx := CDX{ + Surt: fields[0], + Date: fields[1], + Link: fields[2], + ContentType: fields[3], + StatusCode: fields[4], + Checksum: fields[5], + Size: fields[6], + } + result = append(result, cdx) + } + return result, nil +} + +func prependSchema(s string) string { + if strings.HasPrefix(s, "http") { + return s + } + return fmt.Sprintf("http://%s", s) +} -- cgit v1.2.3