summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-02-02 20:32:35 -0800
committerBryan Newbold <bnewbold@archive.org>2021-02-02 20:32:35 -0800
commit7e4942f8b88d9c580cdb9d46a9d7905b9be55849 (patch)
tree4b95595d24b796d3c5bbf1303168ea4977f9bbf5
parent9e069923e39746a892b1d35d3cc6796c6356725a (diff)
downloadfatcat-cli-7e4942f8b88d9c580cdb9d46a9d7905b9be55849.tar.gz
fatcat-cli-7e4942f8b88d9c580cdb9d46a9d7905b9be55849.zip
WIP on download, with progress bar
-rw-r--r--rust/Cargo.lock59
-rw-r--r--rust/fatcat-cli/Cargo.toml2
-rw-r--r--rust/fatcat-cli/src/download.rs63
-rw-r--r--rust/fatcat-cli/src/lib.rs4
-rw-r--r--rust/fatcat-cli/src/main.rs47
5 files changed, 169 insertions, 6 deletions
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index 305eaf0..7f208c2 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -182,6 +182,21 @@ dependencies = [
]
[[package]]
+name = "console"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7cc80946b3480f421c2f17ed1cb841753a371c7c5104f51d507e13f532c856aa"
+dependencies = [
+ "encode_unicode",
+ "lazy_static",
+ "libc",
+ "regex",
+ "terminal_size",
+ "unicode-width",
+ "winapi 0.3.8",
+]
+
+[[package]]
name = "core-foundation"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -228,6 +243,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
[[package]]
+name = "encode_unicode"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
+
+[[package]]
name = "encoding_rs"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -261,6 +282,7 @@ dependencies = [
"env_logger",
"fatcat-openapi",
"hyper",
+ "indicatif",
"lazy_static",
"log 0.4.8",
"macaroon",
@@ -275,6 +297,7 @@ dependencies = [
"termcolor",
"tokio",
"toml",
+ "url",
]
[[package]]
@@ -736,6 +759,18 @@ dependencies = [
]
[[package]]
+name = "indicatif"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4"
+dependencies = [
+ "console",
+ "lazy_static",
+ "number_prefix",
+ "regex",
+]
+
+[[package]]
name = "iovec"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -991,6 +1026,12 @@ dependencies = [
]
[[package]]
+name = "number_prefix"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"
+
+[[package]]
name = "once_cell"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1272,9 +1313,9 @@ checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
[[package]]
name = "regex"
-version = "1.3.9"
+version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
+checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a"
dependencies = [
"aho-corasick",
"memchr",
@@ -1284,9 +1325,9 @@ dependencies = [
[[package]]
name = "regex-syntax"
-version = "0.6.18"
+version = "0.6.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
+checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581"
[[package]]
name = "remove_dir_all"
@@ -1657,6 +1698,16 @@ dependencies = [
]
[[package]]
+name = "terminal_size"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86ca8ced750734db02076f44132d802af0b33b09942331f4459dde8636fd2406"
+dependencies = [
+ "libc",
+ "winapi 0.3.8",
+]
+
+[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/rust/fatcat-cli/Cargo.toml b/rust/fatcat-cli/Cargo.toml
index 30aefdb..dbb114a 100644
--- a/rust/fatcat-cli/Cargo.toml
+++ b/rust/fatcat-cli/Cargo.toml
@@ -36,6 +36,8 @@ serde = "1.0"
reqwest = { version = "0.10", features = ["blocking", "json"] }
chrono-humanize = "*"
tempfile = "3"
+indicatif = "0.15"
+url = "*"
[dev-dependencies]
diff --git a/rust/fatcat-cli/src/download.rs b/rust/fatcat-cli/src/download.rs
new file mode 100644
index 0000000..c8c05fd
--- /dev/null
+++ b/rust/fatcat-cli/src/download.rs
@@ -0,0 +1,63 @@
+
+use anyhow::{anyhow, Context, Result};
+use indicatif::ProgressBar;
+use fatcat_openapi::models::FileEntity;
+use reqwest::header::USER_AGENT;
+use url::Url;
+use std::fs::File;
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum DownloadStatus {
+ Exists(String),
+ Downloaded(String),
+ NetworkError(String),
+ NoAccess,
+ NotYet,
+}
+
+// eg, https://web.archive.org/web/20140802044207/http://www.geo.coop:80/sites/default/files/labs_of_oligarchy.pdf
+fn rewrite_wayback_url(url: Url) -> Result<Url> {
+ // TODO: make this function correct, and add tests
+ let mut segments: Vec<String> = url.path_segments().unwrap().map(|x| x.to_string()).collect();
+ if segments[0] == "web" && segments[1].len() == 14 {
+ segments[1] = format!("{}id_", segments[1]);
+ }
+ Ok(url)
+}
+
+/// Attempts to download a file entity, including verifying checksum.
+pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> {
+
+ // TODO: check if file has sha1hex
+ // TODO: check if file already exists
+
+ // TODO: only archive.org URLs
+ let raw_url = fe.urls.unwrap()[0].url.clone();
+
+ let mut url = Url::parse(&raw_url)?;
+ if url.host_str() == Some("web.archive.org") {
+ url = rewrite_wayback_url(url)?;
+ }
+
+ // TODO: open temporary file (real file plus suffix?)
+ let out_file = File::create(format!("{}.pdf", fe.sha1.unwrap()))?;
+
+ println!("downloading: {}", url);
+ let client = reqwest::blocking::Client::new();
+ let mut resp = client.get(url)
+ .header(USER_AGENT, "fatcat-cli/0.0.0")
+ .send()?;
+
+ // TODO: parse headers
+ // TODO: resp.error_for_status()?;
+ if !resp.status().is_success() {
+ return Ok(DownloadStatus::NetworkError(format!("{}", resp.status())));
+ }
+
+ // TODO: what if no filesize?
+ // TODO: compare with resp.content_length(() -> Option<u64>
+ let pb = ProgressBar::new(fe.size.unwrap() as u64);
+ let out_size = resp.copy_to(&mut pb.wrap_write(out_file))?;
+
+ Ok(DownloadStatus::NotYet)
+}
diff --git a/rust/fatcat-cli/src/lib.rs b/rust/fatcat-cli/src/lib.rs
index fc9f209..93c17fb 100644
--- a/rust/fatcat-cli/src/lib.rs
+++ b/rust/fatcat-cli/src/lib.rs
@@ -17,11 +17,13 @@ mod api;
mod entities;
mod search;
mod specifier;
+mod download;
pub use api::FatcatApiClient;
pub use entities::{ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation};
pub use search::crude_search;
pub use specifier::Specifier;
+pub use download::download_file;
// Want to show:
// - whether api_token found
@@ -138,7 +140,7 @@ impl FromStr for EntityType {
"container" => Ok(EntityType::Container),
"creator" => Ok(EntityType::Creator),
"file" => Ok(EntityType::File),
- "fileset" => Ok(EntityType::FileSet),
+ "FILESET" => Ok(EntityType::FileSet),
"webcapture" => Ok(EntityType::WebCapture),
_ => Err(anyhow!("invalid entity type : {}", s)),
}
diff --git a/rust/fatcat-cli/src/main.rs b/rust/fatcat-cli/src/main.rs
index 046a825..75ddc6a 100644
--- a/rust/fatcat-cli/src/main.rs
+++ b/rust/fatcat-cli/src/main.rs
@@ -163,8 +163,10 @@ enum Command {
#[structopt(subcommand)]
cmd: EditgroupCommand,
},
+ Download {
+ specifier: Specifier,
+ },
//Changelog
- //Download
//History
Search {
entity_type: EntityType,
@@ -324,6 +326,49 @@ fn run(opt: Opt) -> Result<()> {
.context("updating after edit")?;
println!("{}", serde_json::to_string(&ee)?);
}
+ Command::Download{specifier} => {
+ // run lookups if necessary (inefficient)
+ let specifier = match specifier {
+ Specifier::ReleaseLookup(_, _) | Specifier::FileLookup(_, _) =>
+ specifier.into_entity_specifier(&mut api_client)?,
+ _ => specifier,
+ };
+ let file_entities = match specifier {
+ Specifier::Release(ident) => {
+ let result = api_client.rt.block_on(
+ api_client.api.get_release(ident.clone(), Some("files".to_string()), Some("abstracts,refs".to_string()))
+ )?;
+ let release_entity = match result {
+ fatcat_openapi::GetReleaseResponse::FoundEntity(model) => {
+ Ok(model)
+ },
+ resp => Err(anyhow!("{:?}", resp))
+ .with_context(|| format!("API GET failed: {:?}", ident)),
+ }?;
+ // TODO: not unwrap
+ release_entity.files.unwrap()
+ },
+ Specifier::File(ident) => {
+ let result = api_client.rt.block_on(
+ api_client.api.get_file(ident.clone(), None, None)
+ )?;
+ let file_entity = match result {
+ fatcat_openapi::GetFileResponse::FoundEntity(model) => {
+ Ok(model)
+ },
+ resp => Err(anyhow!("{:?}", resp))
+ .with_context(|| format!("API GET failed: {:?}", ident)),
+ }?;
+ vec![file_entity]
+ },
+ // TODO: not panic
+ _ => panic!("TODO: can only fetch file or release"),
+ };
+ for fe in file_entities {
+ let status = download_file(fe)?;
+ println!("{:?}", status);
+ };
+ }
Command::Search {
entity_type,
terms,