aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/cdx.go74
1 files changed, 74 insertions, 0 deletions
diff --git a/skate/cdx.go b/skate/cdx.go
new file mode 100644
index 0000000..c618c97
--- /dev/null
+++ b/skate/cdx.go
@@ -0,0 +1,74 @@
+package skate
+
+import (
+ "fmt"
+ "io/ioutil"
+ "log"
+ "net/http"
+ "strings"
+
+ "github.com/sethgrid/pester"
+)
+
+const cdxApi = "http://web.archive.org/cdx/search/cdx"
+
+// CDX line, might add more fields later.
+type CDX struct {
+ Surt string
+ Date string
+ Link string
+ ContentType string
+ StatusCode string
+ Checksum string
+ Size string
+}
+
+// LookupCDX asks CDX API. Result will be like:
+// net,ijmse)/uploadfile/2016/1214/20161214052559646.pdf 20170516210333
+// http://www.ijmse.net:80/uploadfile/2016/1214/20161214052559646.pdf
+// application/pdf 200 PBPHE2OILTB43TAOUO33GBWLE2SS4LQX 2079755
+func LookupCDX(link string) (result []CDX, err error) {
+ link = prependSchema(link)
+ cdxlink := fmt.Sprintf("%s?url=%s", cdxApi, link)
+ log.Printf("[lookup] %s", cdxlink)
+ req, err := http.NewRequest("GET", cdxlink, nil)
+ if err != nil {
+ return nil, err
+ }
+ resp, err := pester.Do(req)
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+ b, err := ioutil.ReadAll(resp.Body)
+ if err != nil {
+ return nil, err
+ }
+ for _, line := range strings.Split(string(b), "\n") {
+ var fields = strings.Fields(line)
+ if len(fields) == 0 {
+ continue
+ }
+ if len(fields) < 7 {
+ log.Printf("short line: %s", line)
+ }
+ cdx := CDX{
+ Surt: fields[0],
+ Date: fields[1],
+ Link: fields[2],
+ ContentType: fields[3],
+ StatusCode: fields[4],
+ Checksum: fields[5],
+ Size: fields[6],
+ }
+ result = append(result, cdx)
+ }
+ return result, nil
+}
+
+func prependSchema(s string) string {
+ if strings.HasPrefix(s, "http") {
+ return s
+ }
+ return fmt.Sprintf("http://%s", s)
+}