aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-02-04 23:22:11 -0800
committerBryan Newbold <bnewbold@archive.org>2021-02-04 23:22:11 -0800
commit19535c6c6ebcd671ad9582d6502734320618876a (patch)
tree2056af48624eb71f546d96bfeeae5cbc1dc790fa
parent6b20f4a2eb8134b35d63cfdb9eed71c76bce6148 (diff)
downloadfatcat-cli-19535c6c6ebcd671ad9582d6502734320618876a.tar.gz
fatcat-cli-19535c6c6ebcd671ad9582d6502734320618876a.zip
improve download routines; move 'edit' to commands.rs
-rw-r--r--rust/fatcat-cli/src/commands.rs46
-rw-r--r--rust/fatcat-cli/src/download.rs75
-rw-r--r--rust/fatcat-cli/src/lib.rs4
-rw-r--r--rust/fatcat-cli/src/main.rs57
4 files changed, 119 insertions, 63 deletions
diff --git a/rust/fatcat-cli/src/commands.rs b/rust/fatcat-cli/src/commands.rs
index c0000c7..30fa0c4 100644
--- a/rust/fatcat-cli/src/commands.rs
+++ b/rust/fatcat-cli/src/commands.rs
@@ -9,8 +9,8 @@ use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use crate::api::FatcatApiClient;
//use crate::download::download_file;
-use crate::entities::{ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation};
-//use crate::specifier::Specifier;
+use crate::entities::{ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation, read_entity_file};
+use crate::specifier::Specifier;
// Want to show:
// - whether api_token found
@@ -270,3 +270,45 @@ pub fn print_entity_histories(
}
Ok(())
}
+
+pub fn edit_entity_locally(api_client: &mut FatcatApiClient, specifier: Specifier, editgroup_id: String, json: bool, editing_command: String) -> Result<models::EntityEdit> {
+ // TODO: fetch editgroup, check if this entity is already being updated in it. If so,
+ // need to fetch that revision, do the edit, parse that synatx is good, then delete the
+ // existing edit and update with the new one.
+ let original_entity = specifier.get_from_api(api_client, None, None)?;
+ let exact_specifier = original_entity.specifier();
+ let tmp_file = tempfile::Builder::new()
+ .suffix(if json { ".json" } else { ".toml" })
+ .tempfile()?;
+ if json {
+ writeln!(&tmp_file, "{}", original_entity.to_json_string()?)?
+ } else {
+ writeln!(&tmp_file, "{}", original_entity.to_toml_string()?)?
+ }
+ let mut editor_cmd = std::process::Command::new(&editing_command)
+ .arg(tmp_file.path())
+ .spawn()
+ .expect("failed to execute process");
+ let cmd_status = editor_cmd.wait()?;
+ if !cmd_status.success() {
+ return Err(anyhow!(
+ "editor ({}) exited with non-success status code ({}), bailing on edit",
+ editing_command,
+ cmd_status
+ .code()
+ .map(|v| v.to_string())
+ .unwrap_or_else(|| "N/A".to_string())
+ ));
+ };
+ let json_str = read_entity_file(Some(tmp_file.path().to_path_buf()))?;
+ // for whatever reason api_client's TCP connection is broken after spawning, so try a
+ // dummy call, expected to fail, but connection should re-establish after this
+ specifier
+ .get_from_api(api_client, None, None)
+ .context("re-fetch")
+ .ok();
+ let ee = api_client
+ .update_entity_from_json(exact_specifier, &json_str, editgroup_id)
+ .context("updating after edit")?;
+ Ok(ee)
+}
diff --git a/rust/fatcat-cli/src/download.rs b/rust/fatcat-cli/src/download.rs
index 0fcf370..5500a7a 100644
--- a/rust/fatcat-cli/src/download.rs
+++ b/rust/fatcat-cli/src/download.rs
@@ -1,17 +1,21 @@
use anyhow::{anyhow, Context, Result};
-use fatcat_openapi::models::FileEntity;
+use fatcat_openapi::models::{FileEntity, ReleaseEntity};
use indicatif::ProgressBar;
use reqwest::header::USER_AGENT;
use std::fs::File;
use url::Url;
+use std::path::Path;
+
#[derive(Debug, PartialEq, Clone)]
pub enum DownloadStatus {
Exists(String),
Downloaded(String),
NetworkError(String),
- NoAccess,
- NotYet,
+ NoPublicAccess,
+ FileMissingMetadata,
+ WrongSize,
+ WrongHash,
}
// eg, https://web.archive.org/web/20140802044207/http://www.geo.coop:80/sites/default/files/labs_of_oligarchy.pdf
@@ -30,10 +34,42 @@ fn rewrite_wayback_url(url: Url) -> Result<Url> {
/// Attempts to download a file entity, including verifying checksum.
pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> {
- // TODO: check if file has sha1hex
- // TODO: check if file already exists
+ let sha1hex = match fe.sha1 {
+ Some(v) => v,
+ None => return Ok(DownloadStatus::FileMissingMetadata),
+ };
+ let expected_size = match fe.size {
+ Some(v) => v as u64,
+ None => return Ok(DownloadStatus::FileMissingMetadata),
+ };
+
+ let file_suffix = match fe.mimetype.as_ref().map(String::as_str) {
+ Some("application/pdf") => ".pdf",
+ Some("application/postscript") => ".pdf",
+ Some("text/html") => ".html",
+ Some("text/xml") => ".xml",
+ _ => "",
+ };
+
+ // TODO: output directory
+ let path_string = format!("{}{}", sha1hex, file_suffix);
+ let final_path = Path::new(&path_string);
+
+ if final_path.exists() {
+ return Ok(DownloadStatus::Exists(final_path.to_string_lossy().to_string()));
+ };
+
+ let path_string = format!("{}{}.partial", sha1hex, file_suffix);
+ let download_path = Path::new(&path_string);
- // TODO: only archive.org URLs
+ let raw_url = match fe.urls.as_ref() {
+ None => return Ok(DownloadStatus::NoPublicAccess),
+ Some(url_list) if url_list.len() == 0 => return Ok(DownloadStatus::NoPublicAccess),
+ // TODO: remove clone (?)
+ // TODO: better heuristic than "just try first URL"
+ Some(url_list) => url_list[0].url.clone(),
+ };
+ // TODO: only archive.org URLs (?)
let raw_url = fe.urls.unwrap()[0].url.clone();
let mut url = Url::parse(&raw_url)?;
@@ -42,7 +78,7 @@ pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> {
}
// TODO: open temporary file (real file plus suffix?)
- let out_file = File::create(format!("{}.pdf", fe.sha1.unwrap()))?;
+ let download_file = File::create(download_path)?;
println!("downloading: {}", url);
let client = reqwest::blocking::Client::new();
@@ -60,7 +96,28 @@ pub fn download_file(fe: FileEntity) -> Result<DownloadStatus> {
// TODO: what if no filesize?
// TODO: compare with resp.content_length(() -> Option<u64>
let pb = ProgressBar::new(fe.size.unwrap() as u64);
- let _out_size = resp.copy_to(&mut pb.wrap_write(out_file))?;
+ let out_size = resp.copy_to(&mut pb.wrap_write(download_file))?;
- Ok(DownloadStatus::NotYet)
+ if out_size != expected_size {
+ // TODO: delete partial file?
+ return Ok(DownloadStatus::WrongSize);
+ }
+
+ Ok(DownloadStatus::Downloaded(final_path.to_string_lossy().to_string()))
+}
+
+pub fn download_release(re: ReleaseEntity) -> Result<DownloadStatus> {
+ let file_entities = match re.files {
+ None => return Err(anyhow!("expected file sub-entities to be 'expanded' on release")),
+ Some(list) => list,
+ };
+ let mut status = DownloadStatus::NoPublicAccess;
+ for fe in file_entities {
+ status = download_file(fe)?;
+ match status {
+ DownloadStatus::Exists(_) | DownloadStatus::Downloaded(_) => break,
+ _ => (),
+ };
+ }
+ Ok(status)
}
diff --git a/rust/fatcat-cli/src/lib.rs b/rust/fatcat-cli/src/lib.rs
index 8a48a3b..206fd09 100644
--- a/rust/fatcat-cli/src/lib.rs
+++ b/rust/fatcat-cli/src/lib.rs
@@ -12,9 +12,9 @@ mod specifier;
pub use api::FatcatApiClient;
pub use commands::{
- print_changelog_entries, print_editgroups, print_entity_histories, ClientStatus,
+ print_changelog_entries, print_editgroups, print_entity_histories, ClientStatus, edit_entity_locally,
};
-pub use download::download_file;
+pub use download::{download_release, download_file};
pub use entities::{read_entity_file, ApiEntityModel, ApiModelIdent, ApiModelSer, Mutation};
pub use search::crude_search;
pub use specifier::Specifier;
diff --git a/rust/fatcat-cli/src/main.rs b/rust/fatcat-cli/src/main.rs
index b677aca..79efbd3 100644
--- a/rust/fatcat-cli/src/main.rs
+++ b/rust/fatcat-cli/src/main.rs
@@ -398,44 +398,7 @@ fn run(opt: Opt) -> Result<()> {
toml: _,
editing_command,
} => {
- // TODO: fetch editgroup, check if this entity is already being updated in it. If so,
- // need to fetch that revision, do the edit, parse that synatx is good, then delete the
- // existing edit and update with the new one.
- let original_entity = specifier.get_from_api(&mut api_client, None, None)?;
- let exact_specifier = original_entity.specifier();
- let tmp_file = tempfile::Builder::new()
- .suffix(if json { ".json" } else { ".toml" })
- .tempfile()?;
- if json {
- writeln!(&tmp_file, "{}", original_entity.to_json_string()?)?
- } else {
- writeln!(&tmp_file, "{}", original_entity.to_toml_string()?)?
- }
- let mut editor_cmd = std::process::Command::new(&editing_command)
- .arg(tmp_file.path())
- .spawn()
- .expect("failed to execute process");
- let cmd_status = editor_cmd.wait()?;
- if !cmd_status.success() {
- return Err(anyhow!(
- "editor ({}) exited with non-success status code ({}), bailing on edit",
- editing_command,
- cmd_status
- .code()
- .map(|v| v.to_string())
- .unwrap_or_else(|| "N/A".to_string())
- ));
- };
- let json_str = read_entity_file(Some(tmp_file.path().to_path_buf()))?;
- // for whatever reason api_client's TCP connection is broken after spawning, so try a
- // dummy call, expected to fail, but connection should re-establish after this
- specifier
- .get_from_api(&mut api_client, None, None)
- .context("re-fetch")
- .ok();
- let ee = api_client
- .update_entity_from_json(exact_specifier, &json_str, editgroup_id)
- .context("updating after edit")?;
+ let ee = edit_entity_locally(&mut api_client, specifier, editgroup_id, json, editing_command)?;
println!("{}", serde_json::to_string(&ee)?);
}
Command::Changelog {
@@ -514,10 +477,9 @@ fn run(opt: Opt) -> Result<()> {
// run lookups if necessary (inefficient)
let specifier = match specifier {
Specifier::ReleaseLookup(_, _) | Specifier::FileLookup(_, _) => specifier.into_entity_specifier(&mut api_client)?,
- // XXX:
_ => specifier,
};
- let file_entities = match specifier {
+ let status = match specifier {
Specifier::Release(ident) => {
let result = api_client.rt.block_on(
api_client.api.get_release(ident.clone(), Some("files".to_string()), Some("abstracts,refs".to_string()))
@@ -529,8 +491,7 @@ fn run(opt: Opt) -> Result<()> {
resp => Err(anyhow!("{:?}", resp))
.with_context(|| format!("API GET failed: {:?}", ident)),
}?;
- // TODO: not unwrap
- release_entity.files.unwrap()
+ download_release(release_entity)
},
Specifier::File(ident) => {
let result = api_client.rt.block_on(
@@ -543,15 +504,11 @@ fn run(opt: Opt) -> Result<()> {
resp => Err(anyhow!("{:?}", resp))
.with_context(|| format!("API GET failed: {:?}", ident)),
}?;
- vec![file_entity]
+ download_file(file_entity)
},
- // TODO: not panic
- _ => panic!("TODO: can only fetch file or release"),
- };
- for fe in file_entities {
- let status = download_file(fe)?;
- println!("{:?}", status);
- };
+ other => Err(anyhow!("Don't know how to download: {:?}", other)),
+ }?;
+ println!("{:?}", status);
}
Command::Search {
entity_type,