diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2021-02-02 20:32:35 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2021-02-02 20:32:35 -0800 | 
| commit | 7e4942f8b88d9c580cdb9d46a9d7905b9be55849 (patch) | |
| tree | 4b95595d24b796d3c5bbf1303168ea4977f9bbf5 /rust | |
| parent | 9e069923e39746a892b1d35d3cc6796c6356725a (diff) | |
| download | fatcat-cli-7e4942f8b88d9c580cdb9d46a9d7905b9be55849.tar.gz fatcat-cli-7e4942f8b88d9c580cdb9d46a9d7905b9be55849.zip | |
WIP on download, with progress bar
Diffstat (limited to 'rust')
| -rw-r--r-- | rust/Cargo.lock | 59 | ||||
| -rw-r--r-- | rust/fatcat-cli/Cargo.toml | 2 | ||||
| -rw-r--r-- | rust/fatcat-cli/src/download.rs | 63 | ||||
| -rw-r--r-- | rust/fatcat-cli/src/lib.rs | 4 | ||||
| -rw-r--r-- | rust/fatcat-cli/src/main.rs | 47 | 
5 files changed, 169 insertions, 6 deletions
| diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 305eaf0..7f208c2 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -182,6 +182,21 @@ dependencies = [  ]  [[package]] +name = "console" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cc80946b3480f421c2f17ed1cb841753a371c7c5104f51d507e13f532c856aa" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "regex", + "terminal_size", + "unicode-width", + "winapi 0.3.8", +] + +[[package]]  name = "core-foundation"  version = "0.7.0"  source = "registry+https://github.com/rust-lang/crates.io-index" @@ -228,6 +243,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"  checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"  [[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]]  name = "encoding_rs"  version = "0.8.23"  source = "registry+https://github.com/rust-lang/crates.io-index" @@ -261,6 +282,7 @@ dependencies = [   "env_logger",   "fatcat-openapi",   "hyper", + "indicatif",   "lazy_static",   "log 0.4.8",   "macaroon", @@ -275,6 +297,7 @@ dependencies = [   "termcolor",   "tokio",   "toml", + "url",  ]  [[package]] @@ -736,6 +759,18 @@ dependencies = [  ]  [[package]] +name = "indicatif" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4" +dependencies = [ + "console", + "lazy_static", + "number_prefix", + "regex", +] + +[[package]]  name = "iovec"  version = "0.1.4"  source = "registry+https://github.com/rust-lang/crates.io-index" @@ -991,6 +1026,12 @@ dependencies = [  ]  [[package]] +name = "number_prefix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" + +[[package]]  name = "once_cell"  version = "1.5.2"  source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1272,9 +1313,9 @@ checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"  [[package]]  name = "regex" -version = "1.3.9" +version = "1.4.3"  source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" +checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a"  dependencies = [   "aho-corasick",   "memchr", @@ -1284,9 +1325,9 @@ dependencies = [  [[package]]  name = "regex-syntax" -version = "0.6.18" +version = "0.6.22"  source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581"  [[package]]  name = "remove_dir_all" @@ -1657,6 +1698,16 @@ dependencies = [  ]  [[package]] +name = "terminal_size" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ca8ced750734db02076f44132d802af0b33b09942331f4459dde8636fd2406" +dependencies = [ + "libc", + "winapi 0.3.8", +] + +[[package]]  name = "textwrap"  version = "0.11.0"  source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/rust/fatcat-cli/Cargo.toml b/rust/fatcat-cli/Cargo.toml index 30aefdb..dbb114a 100644 --- a/rust/fatcat-cli/Cargo.toml +++ b/rust/fatcat-cli/Cargo.toml @@ -36,6 +36,8 @@ serde = "1.0"  reqwest = { version = "0.10", features = ["blocking", "json"] }  chrono-humanize = "*"  tempfile = "3" +indicatif = "0.15" +url = "*"  [dev-dependencies] diff --git a/rust/fatcat-cli/src/download.rs b/rust/fatcat-cli/src/download.rs new file mode 100644 index 0000000..c8c05fd --- /dev/null +++ b/rust/fatcat-cli/src/download.rs @@ -0,0 +1,63 @@ + +use anyhow::{anyhow, Context, Result}; +use indicatif::ProgressBar; +use fatcat_openapi::models::FileEntity; +use reqwest::header::USER_AGENT; +use url::Url; +use std::fs::File; + +#[derive(Debug, PartialEq, Clone)] +pub enum DownloadStatus { +    Exists(String), +    Downloaded(String), +    NetworkError(String), +    NoAccess, +    NotYet, +} + +// eg, https://web.archive.org/web/20140802044207/http://www.geo.coop:80/sites/default/files/labs_of_oligarchy.pdf +fn rewrite_wayback_url(url: Url) -> Result<Url> { +    // TODO: make this function correct, and add tests +    let mut segments: Vec<String> = url.path_segments().unwrap().map(|x| x.to_string()).collect(); +    if segments[0] == "web" && segments[1].len() == 14 { +        segments[1] = format!("{}id_", segments[1]); +    } +    Ok(url) +} + +/// Attempts to download a file entity, including verifying checksum. +pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> { + +    // TODO: check if file has sha1hex +    // TODO: check if file already exists + +    // TODO: only archive.org URLs +    let raw_url = fe.urls.unwrap()[0].url.clone(); + +    let mut url = Url::parse(&raw_url)?; +    if url.host_str() == Some("web.archive.org") { +        url = rewrite_wayback_url(url)?; +    } + +    // TODO: open temporary file (real file plus suffix?) +    let out_file = File::create(format!("{}.pdf", fe.sha1.unwrap()))?; + +    println!("downloading: {}", url); +    let client = reqwest::blocking::Client::new(); +    let mut resp = client.get(url) +        .header(USER_AGENT, "fatcat-cli/0.0.0") +        .send()?; + +    // TODO: parse headers +    // TODO: resp.error_for_status()?; +    if !resp.status().is_success() { +        return Ok(DownloadStatus::NetworkError(format!("{}", resp.status()))); +    } + +    // TODO: what if no filesize? +    // TODO: compare with resp.content_length(() -> Option<u64> +    let pb = ProgressBar::new(fe.size.unwrap() as u64); +    let out_size = resp.copy_to(&mut pb.wrap_write(out_file))?; + +    Ok(DownloadStatus::NotYet) +} diff --git a/rust/fatcat-cli/src/lib.rs b/rust/fatcat-cli/src/lib.rs index fc9f209..93c17fb 100644 --- a/rust/fatcat-cli/src/lib.rs +++ b/rust/fatcat-cli/src/lib.rs @@ -17,11 +17,13 @@ mod api;  mod entities;  mod search;  mod specifier; +mod download;  pub use api::FatcatApiClient;  pub use entities::{ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation};  pub use search::crude_search;  pub use specifier::Specifier; +pub use download::download_file;  // Want to show:  // - whether api_token found @@ -138,7 +140,7 @@ impl FromStr for EntityType {              "container" => Ok(EntityType::Container),              "creator" => Ok(EntityType::Creator),              "file" => Ok(EntityType::File), -            "fileset" => Ok(EntityType::FileSet), +            "FILESET" => Ok(EntityType::FileSet),              "webcapture" => Ok(EntityType::WebCapture),              _ => Err(anyhow!("invalid entity type : {}", s)),          } diff --git a/rust/fatcat-cli/src/main.rs b/rust/fatcat-cli/src/main.rs index 046a825..75ddc6a 100644 --- a/rust/fatcat-cli/src/main.rs +++ b/rust/fatcat-cli/src/main.rs @@ -163,8 +163,10 @@ enum Command {          #[structopt(subcommand)]          cmd: EditgroupCommand,      }, +    Download { +        specifier: Specifier, +    },      //Changelog -    //Download      //History      Search {          entity_type: EntityType, @@ -324,6 +326,49 @@ fn run(opt: Opt) -> Result<()> {                  .context("updating after edit")?;              println!("{}", serde_json::to_string(&ee)?);          } +        Command::Download{specifier} => { +            // run lookups if necessary (inefficient) +            let specifier = match specifier { +                Specifier::ReleaseLookup(_, _) | Specifier::FileLookup(_, _) =>  +                    specifier.into_entity_specifier(&mut api_client)?, +                _ => specifier, +            }; +            let file_entities = match specifier { +                Specifier::Release(ident) => { +                    let result = api_client.rt.block_on( +                        api_client.api.get_release(ident.clone(), Some("files".to_string()), Some("abstracts,refs".to_string())) +                    )?; +                    let release_entity = match result { +                        fatcat_openapi::GetReleaseResponse::FoundEntity(model) => { +                            Ok(model) +                        }, +                        resp => Err(anyhow!("{:?}", resp)) +                            .with_context(|| format!("API GET failed: {:?}", ident)), +                    }?; +                    // TODO: not unwrap +                    release_entity.files.unwrap() +                }, +                Specifier::File(ident) => { +                    let result = api_client.rt.block_on( +                        api_client.api.get_file(ident.clone(), None, None) +                    )?; +                    let file_entity = match result { +                        fatcat_openapi::GetFileResponse::FoundEntity(model) => { +                            Ok(model) +                        }, +                        resp => Err(anyhow!("{:?}", resp)) +                            .with_context(|| format!("API GET failed: {:?}", ident)), +                    }?; +                    vec![file_entity] +                }, +                // TODO: not panic +                _ => panic!("TODO: can only fetch file or release"), +            }; +            for fe in file_entities { +                let status = download_file(fe)?; +                println!("{:?}", status); +            }; +        }          Command::Search {              entity_type,              terms, | 
