aboutsummaryrefslogtreecommitdiffstats
path: root/rust/fatcat-cli/src/download.rs
blob: c8c05fdfbd3d6d1138067063454b3d2f9dc71bd8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

use anyhow::{anyhow, Context, Result};
use indicatif::ProgressBar;
use fatcat_openapi::models::FileEntity;
use reqwest::header::USER_AGENT;
use url::Url;
use std::fs::File;

#[derive(Debug, PartialEq, Clone)]
pub enum DownloadStatus {
    Exists(String),
    Downloaded(String),
    NetworkError(String),
    NoAccess,
    NotYet,
}

// eg, https://web.archive.org/web/20140802044207/http://www.geo.coop:80/sites/default/files/labs_of_oligarchy.pdf
fn rewrite_wayback_url(url: Url) -> Result<Url> {
    // TODO: make this function correct, and add tests
    let mut segments: Vec<String> = url.path_segments().unwrap().map(|x| x.to_string()).collect();
    if segments[0] == "web" && segments[1].len() == 14 {
        segments[1] = format!("{}id_", segments[1]);
    }
    Ok(url)
}

/// Attempts to download a file entity, including verifying checksum.
pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> {

    // TODO: check if file has sha1hex
    // TODO: check if file already exists

    // TODO: only archive.org URLs
    let raw_url = fe.urls.unwrap()[0].url.clone();

    let mut url = Url::parse(&raw_url)?;
    if url.host_str() == Some("web.archive.org") {
        url = rewrite_wayback_url(url)?;
    }

    // TODO: open temporary file (real file plus suffix?)
    let out_file = File::create(format!("{}.pdf", fe.sha1.unwrap()))?;

    println!("downloading: {}", url);
    let client = reqwest::blocking::Client::new();
    let mut resp = client.get(url)
        .header(USER_AGENT, "fatcat-cli/0.0.0")
        .send()?;

    // TODO: parse headers
    // TODO: resp.error_for_status()?;
    if !resp.status().is_success() {
        return Ok(DownloadStatus::NetworkError(format!("{}", resp.status())));
    }

    // TODO: what if no filesize?
    // TODO: compare with resp.content_length(() -> Option<u64>
    let pb = ProgressBar::new(fe.size.unwrap() as u64);
    let out_size = resp.copy_to(&mut pb.wrap_write(out_file))?;

    Ok(DownloadStatus::NotYet)
}