diff options
| -rw-r--r-- | rust/fatcat-cli/src/commands.rs | 46 | ||||
| -rw-r--r-- | rust/fatcat-cli/src/download.rs | 75 | ||||
| -rw-r--r-- | rust/fatcat-cli/src/lib.rs | 4 | ||||
| -rw-r--r-- | rust/fatcat-cli/src/main.rs | 57 | 
4 files changed, 119 insertions, 63 deletions
| diff --git a/rust/fatcat-cli/src/commands.rs b/rust/fatcat-cli/src/commands.rs index c0000c7..30fa0c4 100644 --- a/rust/fatcat-cli/src/commands.rs +++ b/rust/fatcat-cli/src/commands.rs @@ -9,8 +9,8 @@ use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};  use crate::api::FatcatApiClient;  //use crate::download::download_file; -use crate::entities::{ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation}; -//use crate::specifier::Specifier; +use crate::entities::{ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation, read_entity_file}; +use crate::specifier::Specifier;  // Want to show:  // - whether api_token found @@ -270,3 +270,45 @@ pub fn print_entity_histories(      }      Ok(())  } + +pub fn edit_entity_locally(api_client: &mut FatcatApiClient,  specifier: Specifier, editgroup_id: String, json: bool, editing_command: String) -> Result<models::EntityEdit> { +    // TODO: fetch editgroup, check if this entity is already being updated in it. If so, +    // need to fetch that revision, do the edit, parse that synatx is good, then delete the +    // existing edit and update with the new one. +    let original_entity = specifier.get_from_api(api_client, None, None)?; +    let exact_specifier = original_entity.specifier(); +    let tmp_file = tempfile::Builder::new() +        .suffix(if json { ".json" } else { ".toml" }) +        .tempfile()?; +    if json { +        writeln!(&tmp_file, "{}", original_entity.to_json_string()?)? +    } else { +        writeln!(&tmp_file, "{}", original_entity.to_toml_string()?)? +    } +    let mut editor_cmd = std::process::Command::new(&editing_command) +        .arg(tmp_file.path()) +        .spawn() +        .expect("failed to execute process"); +    let cmd_status = editor_cmd.wait()?; +    if !cmd_status.success() { +        return Err(anyhow!( +            "editor ({}) exited with non-success status code ({}), bailing on edit", +            editing_command, +            cmd_status +                .code() +                .map(|v| v.to_string()) +                .unwrap_or_else(|| "N/A".to_string()) +        )); +    }; +    let json_str = read_entity_file(Some(tmp_file.path().to_path_buf()))?; +    // for whatever reason api_client's TCP connection is broken after spawning, so try a +    // dummy call, expected to fail, but connection should re-establish after this +    specifier +        .get_from_api(api_client, None, None) +        .context("re-fetch") +        .ok(); +    let ee = api_client +        .update_entity_from_json(exact_specifier, &json_str, editgroup_id) +        .context("updating after edit")?; +    Ok(ee) +} diff --git a/rust/fatcat-cli/src/download.rs b/rust/fatcat-cli/src/download.rs index 0fcf370..5500a7a 100644 --- a/rust/fatcat-cli/src/download.rs +++ b/rust/fatcat-cli/src/download.rs @@ -1,17 +1,21 @@  use anyhow::{anyhow, Context, Result}; -use fatcat_openapi::models::FileEntity; +use fatcat_openapi::models::{FileEntity, ReleaseEntity};  use indicatif::ProgressBar;  use reqwest::header::USER_AGENT;  use std::fs::File;  use url::Url; +use std::path::Path; +  #[derive(Debug, PartialEq, Clone)]  pub enum DownloadStatus {      Exists(String),      Downloaded(String),      NetworkError(String), -    NoAccess, -    NotYet, +    NoPublicAccess, +    FileMissingMetadata, +    WrongSize, +    WrongHash,  }  // eg, https://web.archive.org/web/20140802044207/http://www.geo.coop:80/sites/default/files/labs_of_oligarchy.pdf @@ -30,10 +34,42 @@ fn rewrite_wayback_url(url: Url) -> Result<Url> {  /// Attempts to download a file entity, including verifying checksum.  pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> { -    // TODO: check if file has sha1hex -    // TODO: check if file already exists +    let sha1hex = match fe.sha1 { +        Some(v) => v, +        None => return Ok(DownloadStatus::FileMissingMetadata), +    }; +    let expected_size = match fe.size { +        Some(v) => v as u64, +        None => return Ok(DownloadStatus::FileMissingMetadata), +    }; + +    let file_suffix = match fe.mimetype.as_ref().map(String::as_str) { +        Some("application/pdf") => ".pdf", +        Some("application/postscript") => ".pdf", +        Some("text/html") => ".html", +        Some("text/xml") => ".xml", +        _ => "", +    }; + +    // TODO: output directory +    let path_string = format!("{}{}", sha1hex, file_suffix); +    let final_path = Path::new(&path_string); + +    if final_path.exists() { +        return Ok(DownloadStatus::Exists(final_path.to_string_lossy().to_string())); +    }; + +    let path_string = format!("{}{}.partial", sha1hex, file_suffix); +    let download_path = Path::new(&path_string); -    // TODO: only archive.org URLs +    let raw_url = match fe.urls.as_ref() { +        None => return Ok(DownloadStatus::NoPublicAccess), +        Some(url_list) if url_list.len() == 0 => return Ok(DownloadStatus::NoPublicAccess), +        // TODO: remove clone (?) +        // TODO: better heuristic than "just try first URL" +        Some(url_list) => url_list[0].url.clone(), +    }; +    // TODO: only archive.org URLs (?)      let raw_url = fe.urls.unwrap()[0].url.clone();      let mut url = Url::parse(&raw_url)?; @@ -42,7 +78,7 @@ pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> {      }      // TODO: open temporary file (real file plus suffix?) -    let out_file = File::create(format!("{}.pdf", fe.sha1.unwrap()))?; +    let download_file = File::create(download_path)?;      println!("downloading: {}", url);      let client = reqwest::blocking::Client::new(); @@ -60,7 +96,28 @@ pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> {      // TODO: what if no filesize?      // TODO: compare with resp.content_length(() -> Option<u64>      let pb = ProgressBar::new(fe.size.unwrap() as u64); -    let _out_size = resp.copy_to(&mut pb.wrap_write(out_file))?; +    let out_size = resp.copy_to(&mut pb.wrap_write(download_file))?; -    Ok(DownloadStatus::NotYet) +    if out_size != expected_size { +        // TODO: delete partial file? +        return Ok(DownloadStatus::WrongSize); +    } + +    Ok(DownloadStatus::Downloaded(final_path.to_string_lossy().to_string())) +} + +pub fn download_release(re: ReleaseEntity) -> Result<DownloadStatus> { +    let file_entities = match re.files { +        None => return Err(anyhow!("expected file sub-entities to be 'expanded' on release")), +        Some(list) => list, +    }; +    let mut status = DownloadStatus::NoPublicAccess; +    for fe in file_entities { +        status = download_file(fe)?; +        match status { +            DownloadStatus::Exists(_) | DownloadStatus::Downloaded(_) => break, +            _ => (), +        }; +    } +    Ok(status)  } diff --git a/rust/fatcat-cli/src/lib.rs b/rust/fatcat-cli/src/lib.rs index 8a48a3b..206fd09 100644 --- a/rust/fatcat-cli/src/lib.rs +++ b/rust/fatcat-cli/src/lib.rs @@ -12,9 +12,9 @@ mod specifier;  pub use api::FatcatApiClient;  pub use commands::{ -    print_changelog_entries, print_editgroups, print_entity_histories, ClientStatus, +    print_changelog_entries, print_editgroups, print_entity_histories, ClientStatus, edit_entity_locally,  }; -pub use download::download_file; +pub use download::{download_release, download_file};  pub use entities::{read_entity_file, ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation};  pub use search::crude_search;  pub use specifier::Specifier; diff --git a/rust/fatcat-cli/src/main.rs b/rust/fatcat-cli/src/main.rs index b677aca..79efbd3 100644 --- a/rust/fatcat-cli/src/main.rs +++ b/rust/fatcat-cli/src/main.rs @@ -398,44 +398,7 @@ fn run(opt: Opt) -> Result<()> {              toml: _,              editing_command,          } => { -            // TODO: fetch editgroup, check if this entity is already being updated in it. If so, -            // need to fetch that revision, do the edit, parse that synatx is good, then delete the -            // existing edit and update with the new one. -            let original_entity = specifier.get_from_api(&mut api_client, None, None)?; -            let exact_specifier = original_entity.specifier(); -            let tmp_file = tempfile::Builder::new() -                .suffix(if json { ".json" } else { ".toml" }) -                .tempfile()?; -            if json { -                writeln!(&tmp_file, "{}", original_entity.to_json_string()?)? -            } else { -                writeln!(&tmp_file, "{}", original_entity.to_toml_string()?)? -            } -            let mut editor_cmd = std::process::Command::new(&editing_command) -                .arg(tmp_file.path()) -                .spawn() -                .expect("failed to execute process"); -            let cmd_status = editor_cmd.wait()?; -            if !cmd_status.success() { -                return Err(anyhow!( -                    "editor ({}) exited with non-success status code ({}), bailing on edit", -                    editing_command, -                    cmd_status -                        .code() -                        .map(|v| v.to_string()) -                        .unwrap_or_else(|| "N/A".to_string()) -                )); -            }; -            let json_str = read_entity_file(Some(tmp_file.path().to_path_buf()))?; -            // for whatever reason api_client's TCP connection is broken after spawning, so try a -            // dummy call, expected to fail, but connection should re-establish after this -            specifier -                .get_from_api(&mut api_client, None, None) -                .context("re-fetch") -                .ok(); -            let ee = api_client -                .update_entity_from_json(exact_specifier, &json_str, editgroup_id) -                .context("updating after edit")?; +            let ee = edit_entity_locally(&mut api_client, specifier, editgroup_id, json, editing_command)?;              println!("{}", serde_json::to_string(&ee)?);          }          Command::Changelog { @@ -514,10 +477,9 @@ fn run(opt: Opt) -> Result<()> {              // run lookups if necessary (inefficient)              let specifier = match specifier {                  Specifier::ReleaseLookup(_, _) | Specifier::FileLookup(_, _) => specifier.into_entity_specifier(&mut api_client)?, -                // XXX:                  _ => specifier,              }; -            let file_entities = match specifier { +            let status = match specifier {                  Specifier::Release(ident) => {                      let result = api_client.rt.block_on(                          api_client.api.get_release(ident.clone(), Some("files".to_string()), Some("abstracts,refs".to_string())) @@ -529,8 +491,7 @@ fn run(opt: Opt) -> Result<()> {                          resp => Err(anyhow!("{:?}", resp))                              .with_context(|| format!("API GET failed: {:?}", ident)),                      }?; -                    // TODO: not unwrap -                    release_entity.files.unwrap() +                    download_release(release_entity)                  },                  Specifier::File(ident) => {                      let result = api_client.rt.block_on( @@ -543,15 +504,11 @@ fn run(opt: Opt) -> Result<()> {                          resp => Err(anyhow!("{:?}", resp))                              .with_context(|| format!("API GET failed: {:?}", ident)),                      }?; -                    vec![file_entity] +                    download_file(file_entity)                  }, -                // TODO: not panic -                _ => panic!("TODO: can only fetch file or release"), -            }; -            for fe in file_entities { -                let status = download_file(fe)?; -                println!("{:?}", status); -            }; +                other => Err(anyhow!("Don't know how to download: {:?}", other)), +            }?; +            println!("{:?}", status);          }          Command::Search {              entity_type, | 
