From 19535c6c6ebcd671ad9582d6502734320618876a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 4 Feb 2021 23:22:11 -0800 Subject: improve download routines; move 'edit' to commands.rs --- rust/fatcat-cli/src/commands.rs | 46 +++++++++++++++++++++++-- rust/fatcat-cli/src/download.rs | 75 ++++++++++++++++++++++++++++++++++++----- rust/fatcat-cli/src/lib.rs | 4 +-- rust/fatcat-cli/src/main.rs | 57 ++++--------------------------- 4 files changed, 119 insertions(+), 63 deletions(-) diff --git a/rust/fatcat-cli/src/commands.rs b/rust/fatcat-cli/src/commands.rs index c0000c7..30fa0c4 100644 --- a/rust/fatcat-cli/src/commands.rs +++ b/rust/fatcat-cli/src/commands.rs @@ -9,8 +9,8 @@ use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use crate::api::FatcatApiClient; //use crate::download::download_file; -use crate::entities::{ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation}; -//use crate::specifier::Specifier; +use crate::entities::{ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation, read_entity_file}; +use crate::specifier::Specifier; // Want to show: // - whether api_token found @@ -270,3 +270,45 @@ pub fn print_entity_histories( } Ok(()) } + +pub fn edit_entity_locally(api_client: &mut FatcatApiClient, specifier: Specifier, editgroup_id: String, json: bool, editing_command: String) -> Result { + // TODO: fetch editgroup, check if this entity is already being updated in it. If so, + // need to fetch that revision, do the edit, parse that synatx is good, then delete the + // existing edit and update with the new one. + let original_entity = specifier.get_from_api(api_client, None, None)?; + let exact_specifier = original_entity.specifier(); + let tmp_file = tempfile::Builder::new() + .suffix(if json { ".json" } else { ".toml" }) + .tempfile()?; + if json { + writeln!(&tmp_file, "{}", original_entity.to_json_string()?)? + } else { + writeln!(&tmp_file, "{}", original_entity.to_toml_string()?)? + } + let mut editor_cmd = std::process::Command::new(&editing_command) + .arg(tmp_file.path()) + .spawn() + .expect("failed to execute process"); + let cmd_status = editor_cmd.wait()?; + if !cmd_status.success() { + return Err(anyhow!( + "editor ({}) exited with non-success status code ({}), bailing on edit", + editing_command, + cmd_status + .code() + .map(|v| v.to_string()) + .unwrap_or_else(|| "N/A".to_string()) + )); + }; + let json_str = read_entity_file(Some(tmp_file.path().to_path_buf()))?; + // for whatever reason api_client's TCP connection is broken after spawning, so try a + // dummy call, expected to fail, but connection should re-establish after this + specifier + .get_from_api(api_client, None, None) + .context("re-fetch") + .ok(); + let ee = api_client + .update_entity_from_json(exact_specifier, &json_str, editgroup_id) + .context("updating after edit")?; + Ok(ee) +} diff --git a/rust/fatcat-cli/src/download.rs b/rust/fatcat-cli/src/download.rs index 0fcf370..5500a7a 100644 --- a/rust/fatcat-cli/src/download.rs +++ b/rust/fatcat-cli/src/download.rs @@ -1,17 +1,21 @@ use anyhow::{anyhow, Context, Result}; -use fatcat_openapi::models::FileEntity; +use fatcat_openapi::models::{FileEntity, ReleaseEntity}; use indicatif::ProgressBar; use reqwest::header::USER_AGENT; use std::fs::File; use url::Url; +use std::path::Path; + #[derive(Debug, PartialEq, Clone)] pub enum DownloadStatus { Exists(String), Downloaded(String), NetworkError(String), - NoAccess, - NotYet, + NoPublicAccess, + FileMissingMetadata, + WrongSize, + WrongHash, } // eg, https://web.archive.org/web/20140802044207/http://www.geo.coop:80/sites/default/files/labs_of_oligarchy.pdf @@ -30,10 +34,42 @@ fn rewrite_wayback_url(url: Url) -> Result { /// Attempts to download a file entity, including verifying checksum. pub fn download_file(fe: FileEntity) -> Result { - // TODO: check if file has sha1hex - // TODO: check if file already exists + let sha1hex = match fe.sha1 { + Some(v) => v, + None => return Ok(DownloadStatus::FileMissingMetadata), + }; + let expected_size = match fe.size { + Some(v) => v as u64, + None => return Ok(DownloadStatus::FileMissingMetadata), + }; + + let file_suffix = match fe.mimetype.as_ref().map(String::as_str) { + Some("application/pdf") => ".pdf", + Some("application/postscript") => ".pdf", + Some("text/html") => ".html", + Some("text/xml") => ".xml", + _ => "", + }; + + // TODO: output directory + let path_string = format!("{}{}", sha1hex, file_suffix); + let final_path = Path::new(&path_string); + + if final_path.exists() { + return Ok(DownloadStatus::Exists(final_path.to_string_lossy().to_string())); + }; + + let path_string = format!("{}{}.partial", sha1hex, file_suffix); + let download_path = Path::new(&path_string); - // TODO: only archive.org URLs + let raw_url = match fe.urls.as_ref() { + None => return Ok(DownloadStatus::NoPublicAccess), + Some(url_list) if url_list.len() == 0 => return Ok(DownloadStatus::NoPublicAccess), + // TODO: remove clone (?) + // TODO: better heuristic than "just try first URL" + Some(url_list) => url_list[0].url.clone(), + }; + // TODO: only archive.org URLs (?) let raw_url = fe.urls.unwrap()[0].url.clone(); let mut url = Url::parse(&raw_url)?; @@ -42,7 +78,7 @@ pub fn download_file(fe: FileEntity) -> Result { } // TODO: open temporary file (real file plus suffix?) - let out_file = File::create(format!("{}.pdf", fe.sha1.unwrap()))?; + let download_file = File::create(download_path)?; println!("downloading: {}", url); let client = reqwest::blocking::Client::new(); @@ -60,7 +96,28 @@ pub fn download_file(fe: FileEntity) -> Result { // TODO: what if no filesize? // TODO: compare with resp.content_length(() -> Option let pb = ProgressBar::new(fe.size.unwrap() as u64); - let _out_size = resp.copy_to(&mut pb.wrap_write(out_file))?; + let out_size = resp.copy_to(&mut pb.wrap_write(download_file))?; - Ok(DownloadStatus::NotYet) + if out_size != expected_size { + // TODO: delete partial file? + return Ok(DownloadStatus::WrongSize); + } + + Ok(DownloadStatus::Downloaded(final_path.to_string_lossy().to_string())) +} + +pub fn download_release(re: ReleaseEntity) -> Result { + let file_entities = match re.files { + None => return Err(anyhow!("expected file sub-entities to be 'expanded' on release")), + Some(list) => list, + }; + let mut status = DownloadStatus::NoPublicAccess; + for fe in file_entities { + status = download_file(fe)?; + match status { + DownloadStatus::Exists(_) | DownloadStatus::Downloaded(_) => break, + _ => (), + }; + } + Ok(status) } diff --git a/rust/fatcat-cli/src/lib.rs b/rust/fatcat-cli/src/lib.rs index 8a48a3b..206fd09 100644 --- a/rust/fatcat-cli/src/lib.rs +++ b/rust/fatcat-cli/src/lib.rs @@ -12,9 +12,9 @@ mod specifier; pub use api::FatcatApiClient; pub use commands::{ - print_changelog_entries, print_editgroups, print_entity_histories, ClientStatus, + print_changelog_entries, print_editgroups, print_entity_histories, ClientStatus, edit_entity_locally, }; -pub use download::download_file; +pub use download::{download_release, download_file}; pub use entities::{read_entity_file, ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation}; pub use search::crude_search; pub use specifier::Specifier; diff --git a/rust/fatcat-cli/src/main.rs b/rust/fatcat-cli/src/main.rs index b677aca..79efbd3 100644 --- a/rust/fatcat-cli/src/main.rs +++ b/rust/fatcat-cli/src/main.rs @@ -398,44 +398,7 @@ fn run(opt: Opt) -> Result<()> { toml: _, editing_command, } => { - // TODO: fetch editgroup, check if this entity is already being updated in it. If so, - // need to fetch that revision, do the edit, parse that synatx is good, then delete the - // existing edit and update with the new one. - let original_entity = specifier.get_from_api(&mut api_client, None, None)?; - let exact_specifier = original_entity.specifier(); - let tmp_file = tempfile::Builder::new() - .suffix(if json { ".json" } else { ".toml" }) - .tempfile()?; - if json { - writeln!(&tmp_file, "{}", original_entity.to_json_string()?)? - } else { - writeln!(&tmp_file, "{}", original_entity.to_toml_string()?)? - } - let mut editor_cmd = std::process::Command::new(&editing_command) - .arg(tmp_file.path()) - .spawn() - .expect("failed to execute process"); - let cmd_status = editor_cmd.wait()?; - if !cmd_status.success() { - return Err(anyhow!( - "editor ({}) exited with non-success status code ({}), bailing on edit", - editing_command, - cmd_status - .code() - .map(|v| v.to_string()) - .unwrap_or_else(|| "N/A".to_string()) - )); - }; - let json_str = read_entity_file(Some(tmp_file.path().to_path_buf()))?; - // for whatever reason api_client's TCP connection is broken after spawning, so try a - // dummy call, expected to fail, but connection should re-establish after this - specifier - .get_from_api(&mut api_client, None, None) - .context("re-fetch") - .ok(); - let ee = api_client - .update_entity_from_json(exact_specifier, &json_str, editgroup_id) - .context("updating after edit")?; + let ee = edit_entity_locally(&mut api_client, specifier, editgroup_id, json, editing_command)?; println!("{}", serde_json::to_string(&ee)?); } Command::Changelog { @@ -514,10 +477,9 @@ fn run(opt: Opt) -> Result<()> { // run lookups if necessary (inefficient) let specifier = match specifier { Specifier::ReleaseLookup(_, _) | Specifier::FileLookup(_, _) => specifier.into_entity_specifier(&mut api_client)?, - // XXX: _ => specifier, }; - let file_entities = match specifier { + let status = match specifier { Specifier::Release(ident) => { let result = api_client.rt.block_on( api_client.api.get_release(ident.clone(), Some("files".to_string()), Some("abstracts,refs".to_string())) @@ -529,8 +491,7 @@ fn run(opt: Opt) -> Result<()> { resp => Err(anyhow!("{:?}", resp)) .with_context(|| format!("API GET failed: {:?}", ident)), }?; - // TODO: not unwrap - release_entity.files.unwrap() + download_release(release_entity) }, Specifier::File(ident) => { let result = api_client.rt.block_on( @@ -543,15 +504,11 @@ fn run(opt: Opt) -> Result<()> { resp => Err(anyhow!("{:?}", resp)) .with_context(|| format!("API GET failed: {:?}", ident)), }?; - vec![file_entity] + download_file(file_entity) }, - // TODO: not panic - _ => panic!("TODO: can only fetch file or release"), - }; - for fe in file_entities { - let status = download_file(fe)?; - println!("{:?}", status); - }; + other => Err(anyhow!("Don't know how to download: {:?}", other)), + }?; + println!("{:?}", status); } Command::Search { entity_type, -- cgit v1.2.3