diff options
Diffstat (limited to 'rust/fatcat-cli/src/download.rs')
-rw-r--r-- | rust/fatcat-cli/src/download.rs | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/rust/fatcat-cli/src/download.rs b/rust/fatcat-cli/src/download.rs new file mode 100644 index 0000000..c8c05fd --- /dev/null +++ b/rust/fatcat-cli/src/download.rs @@ -0,0 +1,63 @@ + +use anyhow::{anyhow, Context, Result}; +use indicatif::ProgressBar; +use fatcat_openapi::models::FileEntity; +use reqwest::header::USER_AGENT; +use url::Url; +use std::fs::File; + +#[derive(Debug, PartialEq, Clone)] +pub enum DownloadStatus { + Exists(String), + Downloaded(String), + NetworkError(String), + NoAccess, + NotYet, +} + +// eg, https://web.archive.org/web/20140802044207/http://www.geo.coop:80/sites/default/files/labs_of_oligarchy.pdf +fn rewrite_wayback_url(url: Url) -> Result<Url> { + // TODO: make this function correct, and add tests + let mut segments: Vec<String> = url.path_segments().unwrap().map(|x| x.to_string()).collect(); + if segments[0] == "web" && segments[1].len() == 14 { + segments[1] = format!("{}id_", segments[1]); + } + Ok(url) +} + +/// Attempts to download a file entity, including verifying checksum. +pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> { + + // TODO: check if file has sha1hex + // TODO: check if file already exists + + // TODO: only archive.org URLs + let raw_url = fe.urls.unwrap()[0].url.clone(); + + let mut url = Url::parse(&raw_url)?; + if url.host_str() == Some("web.archive.org") { + url = rewrite_wayback_url(url)?; + } + + // TODO: open temporary file (real file plus suffix?) + let out_file = File::create(format!("{}.pdf", fe.sha1.unwrap()))?; + + println!("downloading: {}", url); + let client = reqwest::blocking::Client::new(); + let mut resp = client.get(url) + .header(USER_AGENT, "fatcat-cli/0.0.0") + .send()?; + + // TODO: parse headers + // TODO: resp.error_for_status()?; + if !resp.status().is_success() { + return Ok(DownloadStatus::NetworkError(format!("{}", resp.status()))); + } + + // TODO: what if no filesize? + // TODO: compare with resp.content_length(() -> Option<u64> + let pb = ProgressBar::new(fe.size.unwrap() as u64); + let out_size = resp.copy_to(&mut pb.wrap_write(out_file))?; + + Ok(DownloadStatus::NotYet) +} |