aboutsummaryrefslogtreecommitdiffstats
path: root/rust/fatcat-cli/src/download.rs
diff options
context:
space:
mode:
Diffstat (limited to 'rust/fatcat-cli/src/download.rs')
-rw-r--r--rust/fatcat-cli/src/download.rs63
1 files changed, 63 insertions, 0 deletions
diff --git a/rust/fatcat-cli/src/download.rs b/rust/fatcat-cli/src/download.rs
new file mode 100644
index 0000000..c8c05fd
--- /dev/null
+++ b/rust/fatcat-cli/src/download.rs
@@ -0,0 +1,63 @@
+
+use anyhow::{anyhow, Context, Result};
+use indicatif::ProgressBar;
+use fatcat_openapi::models::FileEntity;
+use reqwest::header::USER_AGENT;
+use url::Url;
+use std::fs::File;
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum DownloadStatus {
+ Exists(String),
+ Downloaded(String),
+ NetworkError(String),
+ NoAccess,
+ NotYet,
+}
+
+// eg, https://web.archive.org/web/20140802044207/http://www.geo.coop:80/sites/default/files/labs_of_oligarchy.pdf
+fn rewrite_wayback_url(url: Url) -> Result<Url> {
+ // TODO: make this function correct, and add tests
+ let mut segments: Vec<String> = url.path_segments().unwrap().map(|x| x.to_string()).collect();
+ if segments[0] == "web" && segments[1].len() == 14 {
+ segments[1] = format!("{}id_", segments[1]);
+ }
+ Ok(url)
+}
+
+/// Attempts to download a file entity, including verifying checksum.
+pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> {
+
+ // TODO: check if file has sha1hex
+ // TODO: check if file already exists
+
+ // TODO: only archive.org URLs
+ let raw_url = fe.urls.unwrap()[0].url.clone();
+
+ let mut url = Url::parse(&raw_url)?;
+ if url.host_str() == Some("web.archive.org") {
+ url = rewrite_wayback_url(url)?;
+ }
+
+ // TODO: open temporary file (real file plus suffix?)
+ let out_file = File::create(format!("{}.pdf", fe.sha1.unwrap()))?;
+
+ println!("downloading: {}", url);
+ let client = reqwest::blocking::Client::new();
+ let mut resp = client.get(url)
+ .header(USER_AGENT, "fatcat-cli/0.0.0")
+ .send()?;
+
+ // TODO: parse headers
+ // TODO: resp.error_for_status()?;
+ if !resp.status().is_success() {
+ return Ok(DownloadStatus::NetworkError(format!("{}", resp.status())));
+ }
+
+ // TODO: what if no filesize?
+ // TODO: compare with resp.content_length(() -> Option<u64>
+ let pb = ProgressBar::new(fe.size.unwrap() as u64);
+ let out_size = resp.copy_to(&mut pb.wrap_write(out_file))?;
+
+ Ok(DownloadStatus::NotYet)
+}