From 7e4942f8b88d9c580cdb9d46a9d7905b9be55849 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 2 Feb 2021 20:32:35 -0800 Subject: WIP on download, with progress bar --- rust/Cargo.lock | 59 +++++++++++++++++++++++++++++++++++--- rust/fatcat-cli/Cargo.toml | 2 ++ rust/fatcat-cli/src/download.rs | 63 +++++++++++++++++++++++++++++++++++++++++ rust/fatcat-cli/src/lib.rs | 4 ++- rust/fatcat-cli/src/main.rs | 47 +++++++++++++++++++++++++++++- 5 files changed, 169 insertions(+), 6 deletions(-) create mode 100644 rust/fatcat-cli/src/download.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 305eaf0..7f208c2 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -181,6 +181,21 @@ dependencies = [ "vec_map", ] +[[package]] +name = "console" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cc80946b3480f421c2f17ed1cb841753a371c7c5104f51d507e13f532c856aa" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "regex", + "terminal_size", + "unicode-width", + "winapi 0.3.8", +] + [[package]] name = "core-foundation" version = "0.7.0" @@ -227,6 +242,12 @@ version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "encoding_rs" version = "0.8.23" @@ -261,6 +282,7 @@ dependencies = [ "env_logger", "fatcat-openapi", "hyper", + "indicatif", "lazy_static", "log 0.4.8", "macaroon", @@ -275,6 +297,7 @@ dependencies = [ "termcolor", "tokio", "toml", + "url", ] [[package]] @@ -735,6 +758,18 @@ dependencies = [ "autocfg", ] +[[package]] +name = "indicatif" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4" +dependencies = [ + "console", + "lazy_static", + "number_prefix", + "regex", +] + [[package]] name = "iovec" version = "0.1.4" @@ -990,6 +1025,12 @@ dependencies = [ "libc", ] +[[package]] +name = "number_prefix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" + [[package]] name = "once_cell" version = "1.5.2" @@ -1272,9 +1313,9 @@ checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" [[package]] name = "regex" -version = "1.3.9" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" +checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" dependencies = [ "aho-corasick", "memchr", @@ -1284,9 +1325,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.18" +version = "0.6.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" [[package]] name = "remove_dir_all" @@ -1656,6 +1697,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ca8ced750734db02076f44132d802af0b33b09942331f4459dde8636fd2406" +dependencies = [ + "libc", + "winapi 0.3.8", +] + [[package]] name = "textwrap" version = "0.11.0" diff --git a/rust/fatcat-cli/Cargo.toml b/rust/fatcat-cli/Cargo.toml index 30aefdb..dbb114a 100644 --- a/rust/fatcat-cli/Cargo.toml +++ b/rust/fatcat-cli/Cargo.toml @@ -36,6 +36,8 @@ serde = "1.0" reqwest = { version = "0.10", features = ["blocking", "json"] } chrono-humanize = "*" tempfile = "3" +indicatif = "0.15" +url = "*" [dev-dependencies] diff --git a/rust/fatcat-cli/src/download.rs b/rust/fatcat-cli/src/download.rs new file mode 100644 index 0000000..c8c05fd --- /dev/null +++ b/rust/fatcat-cli/src/download.rs @@ -0,0 +1,63 @@ + +use anyhow::{anyhow, Context, Result}; +use indicatif::ProgressBar; +use fatcat_openapi::models::FileEntity; +use reqwest::header::USER_AGENT; +use url::Url; +use std::fs::File; + +#[derive(Debug, PartialEq, Clone)] +pub enum DownloadStatus { + Exists(String), + Downloaded(String), + NetworkError(String), + NoAccess, + NotYet, +} + +// eg, https://web.archive.org/web/20140802044207/http://www.geo.coop:80/sites/default/files/labs_of_oligarchy.pdf +fn rewrite_wayback_url(url: Url) -> Result { + // TODO: make this function correct, and add tests + let mut segments: Vec = url.path_segments().unwrap().map(|x| x.to_string()).collect(); + if segments[0] == "web" && segments[1].len() == 14 { + segments[1] = format!("{}id_", segments[1]); + } + Ok(url) +} + +/// Attempts to download a file entity, including verifying checksum. +pub fn download_file(fe: FileEntity) -> Result { + + // TODO: check if file has sha1hex + // TODO: check if file already exists + + // TODO: only archive.org URLs + let raw_url = fe.urls.unwrap()[0].url.clone(); + + let mut url = Url::parse(&raw_url)?; + if url.host_str() == Some("web.archive.org") { + url = rewrite_wayback_url(url)?; + } + + // TODO: open temporary file (real file plus suffix?) + let out_file = File::create(format!("{}.pdf", fe.sha1.unwrap()))?; + + println!("downloading: {}", url); + let client = reqwest::blocking::Client::new(); + let mut resp = client.get(url) + .header(USER_AGENT, "fatcat-cli/0.0.0") + .send()?; + + // TODO: parse headers + // TODO: resp.error_for_status()?; + if !resp.status().is_success() { + return Ok(DownloadStatus::NetworkError(format!("{}", resp.status()))); + } + + // TODO: what if no filesize? + // TODO: compare with resp.content_length(() -> Option + let pb = ProgressBar::new(fe.size.unwrap() as u64); + let out_size = resp.copy_to(&mut pb.wrap_write(out_file))?; + + Ok(DownloadStatus::NotYet) +} diff --git a/rust/fatcat-cli/src/lib.rs b/rust/fatcat-cli/src/lib.rs index fc9f209..93c17fb 100644 --- a/rust/fatcat-cli/src/lib.rs +++ b/rust/fatcat-cli/src/lib.rs @@ -17,11 +17,13 @@ mod api; mod entities; mod search; mod specifier; +mod download; pub use api::FatcatApiClient; pub use entities::{ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation}; pub use search::crude_search; pub use specifier::Specifier; +pub use download::download_file; // Want to show: // - whether api_token found @@ -138,7 +140,7 @@ impl FromStr for EntityType { "container" => Ok(EntityType::Container), "creator" => Ok(EntityType::Creator), "file" => Ok(EntityType::File), - "fileset" => Ok(EntityType::FileSet), + "FILESET" => Ok(EntityType::FileSet), "webcapture" => Ok(EntityType::WebCapture), _ => Err(anyhow!("invalid entity type : {}", s)), } diff --git a/rust/fatcat-cli/src/main.rs b/rust/fatcat-cli/src/main.rs index 046a825..75ddc6a 100644 --- a/rust/fatcat-cli/src/main.rs +++ b/rust/fatcat-cli/src/main.rs @@ -163,8 +163,10 @@ enum Command { #[structopt(subcommand)] cmd: EditgroupCommand, }, + Download { + specifier: Specifier, + }, //Changelog - //Download //History Search { entity_type: EntityType, @@ -324,6 +326,49 @@ fn run(opt: Opt) -> Result<()> { .context("updating after edit")?; println!("{}", serde_json::to_string(&ee)?); } + Command::Download{specifier} => { + // run lookups if necessary (inefficient) + let specifier = match specifier { + Specifier::ReleaseLookup(_, _) | Specifier::FileLookup(_, _) => + specifier.into_entity_specifier(&mut api_client)?, + _ => specifier, + }; + let file_entities = match specifier { + Specifier::Release(ident) => { + let result = api_client.rt.block_on( + api_client.api.get_release(ident.clone(), Some("files".to_string()), Some("abstracts,refs".to_string())) + )?; + let release_entity = match result { + fatcat_openapi::GetReleaseResponse::FoundEntity(model) => { + Ok(model) + }, + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", ident)), + }?; + // TODO: not unwrap + release_entity.files.unwrap() + }, + Specifier::File(ident) => { + let result = api_client.rt.block_on( + api_client.api.get_file(ident.clone(), None, None) + )?; + let file_entity = match result { + fatcat_openapi::GetFileResponse::FoundEntity(model) => { + Ok(model) + }, + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", ident)), + }?; + vec![file_entity] + }, + // TODO: not panic + _ => panic!("TODO: can only fetch file or release"), + }; + for fe in file_entities { + let status = download_file(fe)?; + println!("{:?}", status); + }; + } Command::Search { entity_type, terms, -- cgit v1.2.3