diff options
Diffstat (limited to 'fatcat-cli')
-rw-r--r-- | fatcat-cli/Cargo.toml | 55 | ||||
-rw-r--r-- | fatcat-cli/README.md | 2 | ||||
-rw-r--r-- | fatcat-cli/binary_size.md | 88 | ||||
-rw-r--r-- | fatcat-cli/fatcat-cli.1 | 64 | ||||
-rw-r--r-- | fatcat-cli/fatcat-cli.1.scdoc | 48 | ||||
-rw-r--r-- | fatcat-cli/plan.txt | 146 | ||||
-rw-r--r-- | fatcat-cli/src/api.rs | 440 | ||||
-rw-r--r-- | fatcat-cli/src/commands.rs | 491 | ||||
-rw-r--r-- | fatcat-cli/src/download.rs | 261 | ||||
-rw-r--r-- | fatcat-cli/src/entities.rs | 411 | ||||
-rw-r--r-- | fatcat-cli/src/lib.rs | 93 | ||||
-rw-r--r-- | fatcat-cli/src/main.rs | 631 | ||||
-rw-r--r-- | fatcat-cli/src/search.rs | 186 | ||||
-rw-r--r-- | fatcat-cli/src/specifier.rs | 584 |
14 files changed, 3500 insertions, 0 deletions
diff --git a/fatcat-cli/Cargo.toml b/fatcat-cli/Cargo.toml new file mode 100644 index 0000000..dbb114a --- /dev/null +++ b/fatcat-cli/Cargo.toml @@ -0,0 +1,55 @@ +[package] +name = "fatcat-cli" +version = "0.3.0-dev" +edition = "2018" +authors = ["Bryan Newbold <bnewbold@robocracy.org>"] +license = "AGPL-3+" +#license-file = "../../LICENSE.AGPLv3.txt" +description = "CLI tool for fatcat.wiki, an open digital catalog of research papers" +readme = "fatcat-cli/README.md" +homepage = "https://fatcat.wiki" +repository = "https://github.com/internetarchive/fatcat" +keywords = ["cli", "fatcat", "digital-library"] +categories = ["command-line-utilities"] + + +[dependencies] +data-encoding = "2.1" +fatcat-openapi = { version = "*", path = "../fatcat-openapi", default-features = false, features = ["client"] } +macaroon = { git = "https://github.com/bnewbold/libmacaroon-rs", branch = "bnewbold-broken" } +toml = "0.5" +termcolor = "1" +atty = "0.2" +tabwriter = "1.2" +#human-panic = "1" +structopt = "0.3" +swagger = "5.0.2" +hyper = "0.13" +tokio = { version = "0.2", features = ["rt-threaded", "macros", "stream"] } +serde_json = "1.0" +anyhow = "1.0" +log = "0.4" +env_logger = "0.7" +regex = { version = "1.3", default-features = false, features = ["perf-literal"] } +lazy_static = "1" +serde = "1.0" +reqwest = { version = "0.10", features = ["blocking", "json"] } +chrono-humanize = "*" +tempfile = "3" +indicatif = "0.15" +url = "*" + + +[dev-dependencies] +assert_cmd = "1" + + +[package.metadata.deb] +maintainer = "Bryan Newbold <bnewbold@robocracy.org>" +depends = "$auto" +section = "utility" +priority = "optional" +assets = [ + ["target/release/fatcat-cli", "usr/bin/", "755"], + ["fatcat-cli.1", "usr/share/man/man1/", "644"], +] diff --git a/fatcat-cli/README.md b/fatcat-cli/README.md new file mode 100644 index 0000000..1652d61 --- /dev/null +++ b/fatcat-cli/README.md @@ -0,0 +1,2 @@ + +`fatcat-cli` README. diff --git a/fatcat-cli/binary_size.md b/fatcat-cli/binary_size.md new file mode 100644 index 0000000..a79cf9b --- /dev/null +++ b/fatcat-cli/binary_size.md @@ -0,0 +1,88 @@ + +## Binary Size + +As of 2020-05-24, in early development, the relative binary sizes are: + + 121 MB default debug build + 12 MB default release build + 8.2 MB release build w/ LTO + 6.6 MB release build w/ LTO, striped + +After some small changes: + + 5.9 MB release build w/ LTO, size optimization, other flags + 4.1 MB release build w/ LTO, size optimization, other flags, striped + +Replacing reqwest with minreq: + + 6.3 MB release build w/ LTO, size optimization, other flags + 4.1 MB release build w/ LTO, size optimization, other flags, striped + + (so, not worth it, at least while using fatcat_openapi with hyper+tokio) + +Note that release builds with LTO take *quite* a long time (many minutes). We +probably don't want that to be the defualt for `fatcatd` builds. + + cargo bloat --release --crates + + File .text Size Crate + 12.2% 21.4% 1021.5KiB fatcat_cli + 7.1% 12.5% 596.7KiB fatcat_openapi + 6.3% 11.1% 529.6KiB reqwest + 6.2% 10.9% 518.5KiB std + 3.5% 6.1% 290.3KiB clap + 2.5% 4.3% 205.9KiB regex + 2.4% 4.2% 198.7KiB regex_syntax + 2.1% 3.6% 172.8KiB h2 + 1.9% 3.4% 162.7KiB hyper + 1.8% 3.1% 149.9KiB futures + 1.4% 2.4% 116.9KiB serde_json + 1.3% 2.3% 111.2KiB macaroon + 1.0% 1.8% 85.3KiB unicode_normalization + 0.7% 1.3% 62.4KiB http + 0.6% 1.0% 50.1KiB serde + 0.6% 1.0% 47.5KiB url + 0.5% 0.9% 41.9KiB [Unknown] + 0.4% 0.8% 36.5KiB tokio_reactor + 0.4% 0.7% 31.8KiB env_logger + 0.3% 0.6% 26.6KiB chrono + 3.4% 5.9% 283.3KiB And 57 more crates. Use -n N to show more. + 57.2% 100.0% 4.7MiB .text section size, the file size is 8.2MiB + + + bnewbold@orithena$ cargo bloat --release + Finished release [optimized] target(s) in 0.27s + Analyzing target/release/fatcat-cli + + File .text Size Crate Name + 0.4% 1.0% 53.2KiB regex <regex::exec::ExecNoSync as regex::re_trait::RegularExpression>::capture... + 0.4% 0.8% 44.1KiB regex_syntax regex_syntax::ast::parse::ParserI<P>::parse_with_comments + 0.3% 0.7% 36.8KiB unicode_normalization unicode_normalization::tables::compatibility_fully_decomposed + 0.3% 0.6% 30.3KiB unicode_normalization unicode_normalization::tables::canonical_fully_decomposed + 0.2% 0.5% 25.2KiB data_encoding data_encoding::Encoding::decode_mut + 0.2% 0.5% 24.0KiB fatcat_openapi? <fatcat_openapi::models::_IMPL_DESERIALIZE_FOR_ReleaseEntity::<impl serd... + 0.2% 0.5% 23.5KiB clap clap::app::parser::Parser::get_matches_with + 0.2% 0.4% 21.7KiB clap clap::app::validator::Validator::validate + 0.2% 0.4% 20.6KiB http http::header::name::parse_hdr + 0.2% 0.4% 19.5KiB fatcat_cli fatcat_cli::Specifier::get_from_api + 0.1% 0.3% 16.4KiB fatcat_cli <&mut serde_json::de::Deserializer<R> as serde::de::Deserializer>::deser... + 0.1% 0.3% 16.4KiB fatcat_cli <&mut serde_json::de::Deserializer<R> as serde::de::Deserializer>::deser... + 0.1% 0.3% 16.2KiB fatcat_cli <&mut serde_json::de::Deserializer<R> as serde::de::Deserializer>::deser... + 0.1% 0.3% 16.1KiB fatcat_cli fatcat_cli::run + 0.1% 0.3% 15.2KiB fatcat_cli <&mut serde_json::de::Deserializer<R> as serde::de::Deserializer>::deser... + 0.1% 0.3% 14.3KiB serde_json? <&mut serde_json::de::Deserializer<R> as serde::de::Deserializer>::deser... + 0.1% 0.3% 14.2KiB fatcat_cli <&mut serde_json::de::Deserializer<R> as serde::de::Deserializer>::deser... + 0.1% 0.3% 14.0KiB regex regex::exec::ExecBuilder::build + 0.1% 0.3% 13.8KiB unicode_normalization unicode_normalization::tables::composition_table + 0.1% 0.3% 13.6KiB fatcat_cli <&mut serde_json::de::Deserializer<R> as serde::de::Deserializer>::deser... + 38.6% 89.5% 4.5MiB And 13832 smaller methods. Use -n N to show more. + 43.1% 100.0% 5.1MiB .text section size, the file size is 11.8MiB + +Low hanging fruit includes: + +- reviewing features for reqwest, clap, regex, fatcat_openapi +- replace reqwest with something smaller +- use `ansi-term` (already part of clap) +- consider removing fancy clap features? meh +- look at graph; probably duplicate versions of things + diff --git a/fatcat-cli/fatcat-cli.1 b/fatcat-cli/fatcat-cli.1 new file mode 100644 index 0000000..0831773 --- /dev/null +++ b/fatcat-cli/fatcat-cli.1 @@ -0,0 +1,64 @@ +.\" Generated by scdoc 1.9.0 +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.nh +.ad l +.\" Begin generated content: +.TH "fatcat-cli" "1" "2020-06-14" "fatcat.wiki CLI Manual Page" +.P +.SH NAME +.P +fatcat-cli - something something +.P +.SH SYNOPSIS +.P +fatcat-cli [FLAGS] [OPTIONS] <SUBCOMMAND> +.P +Subcommands: +.P +.RS 4 +create +delete +edit +editgroup +get +help +search +status +update +.P +.RE +.SH DESCRIPTION +.P +.SH OPTIONS +.P +\fB-h, --help\fR +.RS 4 +Prints help information +.P +.RE +\fB-V, --version\fR +.RS 4 +Prints version information +.P +.RE +\fB-v, --verbose\fR +.RS 4 +Pass many times for more log output +By default, it'll only report errors. Passing `-v` one time also prints warnings, `-vv` enables info logging, `-vvv` debug, and `-vvvv` trace. +.P +.RE +\fB--api-host <api-host>\fR [env: FATCAT_API_HOST] [default: https://api.fatcat.wiki] +.P +\fB--api-token <api-token>\fR [env: FATCAT_API_AUTH_TOKEN] +.P +\fB--search-host <search-host>\fR [env: FATCAT_SEARCH_HOST] [default: https://search.fatcat.wiki] +.P +.SH EXAMPLES +.P +Some examples of usage will go here, like: +.P +.RS 4 +fatcat-cli get thing +.P +.RE diff --git a/fatcat-cli/fatcat-cli.1.scdoc b/fatcat-cli/fatcat-cli.1.scdoc new file mode 100644 index 0000000..6a176aa --- /dev/null +++ b/fatcat-cli/fatcat-cli.1.scdoc @@ -0,0 +1,48 @@ +fatcat-cli(1) "fatcat.wiki CLI Manual Page" + +# NAME + +fatcat-cli - something something + +# SYNOPSIS + +fatcat-cli [FLAGS] [OPTIONS] <SUBCOMMAND> + +Subcommands: + + create + delete + edit + editgroup + get + help + search + status + update + +# DESCRIPTION + +# OPTIONS + +*-h, --help* + Prints help information + +*-V, --version* + Prints version information + +*-v, --verbose* + Pass many times for more log output + By default, it'll only report errors. Passing `-v` one time also prints warnings, `-vv` enables info logging, `-vvv` debug, and `-vvvv` trace. + +*--api-host <api-host>* [env: FATCAT_API_HOST] [default: https://api.fatcat.wiki] + +*--api-token <api-token>* [env: FATCAT_API_AUTH_TOKEN] + +*--search-host <search-host>* [env: FATCAT_SEARCH_HOST] [default: https://search.fatcat.wiki] + +# EXAMPLES + +Some examples of usage will go here, like: + + fatcat-cli get thing + diff --git a/fatcat-cli/plan.txt b/fatcat-cli/plan.txt new file mode 100644 index 0000000..651acac --- /dev/null +++ b/fatcat-cli/plan.txt @@ -0,0 +1,146 @@ + +x search release, query string, limit, dumping search doc JSON +x search release, query string, limit, fetching API for each +x search release, query string, scroll API, fetching API for each + +x handle stdout terminated + +x editgroup creation + => set agent +x editgroup accept +x editgroup submit +x editgroup list + +x release create from json/TOML, to an editgroup +x release delete, to an editgroup +x release update from full json/TOML to API +x release edit (using $EDITOR, temp file) + +x release update fields and submit to editgroup + => more fields, like 2-5 for all entity types +x expand/hide flags for get, search + +- search/update/etc containers (and files?) + +- polish and test so actually usable for release edits from search + x consider moving to new repo, with copy of fatcat-openapi-client + x manpage + x .deb generation + => write actual manpage (and, HTML output? ronn? pandoc?) + => write actual README + +- implement @-syntax for create/update + => TODO: what was the proposal here? + => some variant of @-syntax for stream of multiple updates/creations? + +- get revisions for all entity types + + +#### Milestones + +- ability (at all) to revise edits for a single entity in editgroup + => clobber existing edits on update + => edits: get entity in current edit state +- streaming updates from search, with either pipe (jq) or field mutations + => syntax/commands + => batching (syntax? subcommand?) + => auto-accept mode +- download many PDFs from search query + => parallelism could be GNU/parallel + => don't clobber existing + +#### Editgroup Workflow + +- editgroup creation outputs just editgroup_id on stdout (unless output type selected), plus "success" to stderr +- parse editgroup specifier + => "auto": fetch from recent; default? + => "new": create + => editgroup_blah or blah +- implement "delete from editgroup" for updates, edit + => no updates with current setup + => fetch editgroup helper + => helper function that takes editgroup (model) and expanded specifier; deletes existing edit from editgroup if necessary + => skip this codepath for "new" and batch creation + +#### File Downloads + +- download single file: + => try archive.org files, then wayback, then original URLs + => download to current directory as {sha1hex}.pdf.partial, then atomic move on success +- optional directory structure: {dir}/{hex}/{hex}/{sha1hex}.pdf +- parallelism of downloads + +#### Backburner + +- -o/--output and -i/--input for format/schema selection (including 'es-json') +- search release, filters, scroll API, fetching API for each + => structopt parses: query, filter, anti-filter +- search release, filters, scroll API, fetching API for each, verifying revision and filters for each + +## Design Decisions + +- batch/multi behavior for mutations + => need some option to do auto-accept batches +- updates and create, from-file vs. args + => basically, could be any of specifier, input_file, mutations supplied on command-line + => could use httpie @file.blah syntax to load entire file + => "edit" as an option for reading single files from disk? meh + proposal: + create <type> + either reads a file from path/stdin, or has mutation args + optionally --new-editgroup + create-multi <type> + reads multiple JSON from file or stdin + optionally --auto-batch in chunks + optionally --new-editgroup + update <specifier> + takes a specifier + either reads a file from path/stdin, or has mutation args + update-multi <type> + reads multiple JSON from file or stdin + creates new editgroup? + edit <specifier> + delete <specifier> + delete-multi <type> + reads multiple entities from stdin + + --skip-check controls whether to do a GET and validate mutations + => eg, don't update if equal +- holding state about current editgroup + => env var, with helpful output to show how to export + => spawn sub-shell with FATCAT_EDITGROUP set + => state in a config file somewhere (user homedir?) + => "smart" select most recent fatcat-cli editgroup from editor's list +- release revision checking on updates + => could re-fetch and check rev and/or mutations against current before making edit +- delete edit from editgroup + +## Rust refactors + +In rust code, all entity responses could have trait object implementations, +which would transform to either returning the entity (trait object) or error. + +## API refactors + +Could significantly reduce number of response types and endpoints by making +many methods generic (same endpoint URL, but entity type as a keyword): + +- entity history +- delete +- get edit + +Should allow destructive updates in editgroups with "clobber" flag. In +implementation, could either delete first or on conflict do upsert. + +More consistent use of generic success/error? + +## Feature Ideas + +- changelog (table): under editgroup command? +- syntect coloring of output for stdout +- cross build for OS X? homebrew? +- shell (bash) completions from clap +- fcid/UUID helper +- history for all entity types + => pretty table, json optional +- "edit editgroup" as a text file, `git rebase -i` style diff --git a/fatcat-cli/src/api.rs b/fatcat-cli/src/api.rs new file mode 100644 index 0000000..2463aab --- /dev/null +++ b/fatcat-cli/src/api.rs @@ -0,0 +1,440 @@ +use crate::{parse_macaroon_editor_id, EntityType, Specifier}; +use anyhow::{anyhow, Context, Result}; +use fatcat_openapi::models; +use fatcat_openapi::{ApiNoContext, ContextWrapperExt}; +use swagger::{auth, AuthData, ContextBuilder, EmptyContext, Push, XSpanIdString}; + +type FatcatApiContextType = swagger::make_context_ty!( + ContextBuilder, + EmptyContext, + Option<AuthData>, + XSpanIdString +); + +pub struct FatcatApiClient { + pub api: Box<dyn ApiNoContext<FatcatApiContextType>>, + pub rt: tokio::runtime::Runtime, + pub api_token: Option<String>, + pub api_host: String, + pub editor_id: Option<String>, +} + +impl FatcatApiClient { + pub fn new(api_host: String, api_token: Option<String>) -> Result<Self> { + let auth_data = match api_token { + Some(ref token) => Some(AuthData::Bearer(auth::Bearer { + token: token.clone(), + })), + None => None, + }; + //info!("{:?}", auth_data); + let context: FatcatApiContextType = swagger::make_context!( + ContextBuilder, + EmptyContext, + auth_data, + XSpanIdString::default() + ); + + //let wrapped_client: swagger::ContextWrapper< + let client = fatcat_openapi::client::Client::try_new(&api_host) + .context("failed to create HTTP(S) client")?; + let wrapped_client = Box::new(client.with_context(context)); + let rt: tokio::runtime::Runtime = + tokio::runtime::Runtime::new().expect("create tokio runtime"); + + let editor_id = match api_token { + Some(ref token) => { + Some(parse_macaroon_editor_id(token).context("parse API auth token")?) + } + None => None, + }; + + Ok(FatcatApiClient { + api: wrapped_client, + rt, + api_token, + editor_id, + api_host, + }) + } + + pub fn update_editgroup_submit( + &mut self, + editgroup_id: String, + submit: bool, + ) -> Result<models::Editgroup> { + let result = self + .rt + .block_on(self.api.get_editgroup(editgroup_id.clone())) + .context("fetch editgroups")?; + let eg = match result { + fatcat_openapi::GetEditgroupResponse::Found(eg) => eg, + other => { + return Err(anyhow!("{:?}", other)) + .with_context(|| format!("failed to fetch editgroup {}", editgroup_id)) + } + }; + let result = self + .rt + .block_on( + self.api + .update_editgroup(editgroup_id.clone(), eg, Some(submit)), + ) + .context("submit editgroup")?; + match result { + fatcat_openapi::UpdateEditgroupResponse::UpdatedEditgroup(eg) => Ok(eg), + other => Err(anyhow!("{:?}", other)) + .with_context(|| format!("failed to submit editgroup {}", editgroup_id)), + } + } + + pub fn delete_entity( + &mut self, + specifier: Specifier, + editgroup_id: String, + ) -> Result<models::EntityEdit> { + use Specifier::*; + let specifier = specifier.into_entity_specifier(self)?; + match specifier.clone() { + Release(fcid) => match self + .rt + .block_on(self.api.delete_release(editgroup_id, fcid))? + { + fatcat_openapi::DeleteReleaseResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Work(fcid) => match self.rt.block_on(self.api.delete_work(editgroup_id, fcid))? { + fatcat_openapi::DeleteWorkResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Container(fcid) => match self + .rt + .block_on(self.api.delete_container(editgroup_id, fcid))? + { + fatcat_openapi::DeleteContainerResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Creator(fcid) => match self + .rt + .block_on(self.api.delete_creator(editgroup_id, fcid))? + { + fatcat_openapi::DeleteCreatorResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + File(fcid) => match self.rt.block_on(self.api.delete_file(editgroup_id, fcid))? { + fatcat_openapi::DeleteFileResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + FileSet(fcid) => match self + .rt + .block_on(self.api.delete_fileset(editgroup_id, fcid))? + { + fatcat_openapi::DeleteFilesetResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + WebCapture(fcid) => match self + .rt + .block_on(self.api.delete_webcapture(editgroup_id, fcid))? + { + fatcat_openapi::DeleteWebcaptureResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Editgroup(..) | Editor(..) => unimplemented!("deletion for this entity type"), + Changelog(..) => return Err(anyhow!("mutating this entity type doesn't make sense")), + EditorUsername(..) | ReleaseLookup(..) | ContainerLookup(..) | FileLookup(..) + | CreatorLookup(..) => return Err(anyhow!("into_entity_specifier() didn't work?")), + } + .with_context(|| format!("failed to delete {:?}", specifier)) + } + + pub fn create_entity_from_json( + &mut self, + entity_type: EntityType, + json_str: &str, + editgroup_id: String, + ) -> Result<models::EntityEdit> { + match entity_type { + EntityType::Release => { + match self.rt.block_on( + self.api + .create_release(editgroup_id, serde_json::from_str(&json_str)?), + )? { + fatcat_openapi::CreateReleaseResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + } + EntityType::Work => { + match self.rt.block_on( + self.api + .create_work(editgroup_id, serde_json::from_str(&json_str)?), + )? { + fatcat_openapi::CreateWorkResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + } + EntityType::Creator => { + match self.rt.block_on( + self.api + .create_creator(editgroup_id, serde_json::from_str(&json_str)?), + )? { + fatcat_openapi::CreateCreatorResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + } + EntityType::Container => { + match self.rt.block_on( + self.api + .create_container(editgroup_id, serde_json::from_str(&json_str)?), + )? { + fatcat_openapi::CreateContainerResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + } + EntityType::File => { + match self.rt.block_on( + self.api + .create_file(editgroup_id, serde_json::from_str(&json_str)?), + )? { + fatcat_openapi::CreateFileResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + } + EntityType::FileSet => { + match self.rt.block_on( + self.api + .create_fileset(editgroup_id, serde_json::from_str(&json_str)?), + )? { + fatcat_openapi::CreateFilesetResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + } + EntityType::WebCapture => { + match self.rt.block_on( + self.api + .create_webcapture(editgroup_id, serde_json::from_str(&json_str)?), + )? { + fatcat_openapi::CreateWebcaptureResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + } + } + .with_context(|| format!("parsing and creating {:?} entity", entity_type)) + } + + pub fn existing_edit_in_editgroup( + &mut self, + editgroup: &models::Editgroup, + specifier: &Specifier, + ) -> Option<models::EntityEdit> { + use Specifier::*; + let (fcid, edit_list) = match specifier.clone() { + Release(fcid) => (fcid, editgroup.edits.as_ref().unwrap().releases.clone()), + Work(fcid) => (fcid, editgroup.edits.as_ref().unwrap().works.clone()), + Container(fcid) => (fcid, editgroup.edits.as_ref().unwrap().containers.clone()), + Creator(fcid) => (fcid, editgroup.edits.as_ref().unwrap().creators.clone()), + File(fcid) => (fcid, editgroup.edits.as_ref().unwrap().files.clone()), + FileSet(fcid) => (fcid, editgroup.edits.as_ref().unwrap().filesets.clone()), + WebCapture(fcid) => (fcid, editgroup.edits.as_ref().unwrap().webcaptures.clone()), + EditorUsername(..) | ReleaseLookup(..) | ContainerLookup(..) | FileLookup(..) + | CreatorLookup(..) | Editgroup(..) | Editor(..) | Changelog(..) => { + panic!("this entity type doesn't exist in editgroups") + } + }; + for entity_edit in edit_list.unwrap() { + if entity_edit.ident == fcid { + return Some(entity_edit); + } + } + None + } + + pub fn delete_editgroup_edit( + &mut self, + editgroup: &models::Editgroup, + specifier: &Specifier, + edit: &models::EntityEdit, + ) -> Result<()> { + use Specifier::*; + let editgroup_id = editgroup.editgroup_id.clone().unwrap(); + let edit_id = edit.edit_id.clone(); + match specifier.clone() { + Release(..) => match self + .rt + .block_on(self.api.delete_release_edit(editgroup_id, edit_id))? + { + fatcat_openapi::DeleteReleaseEditResponse::DeletedEdit(..) => Ok(()), + other => Err(anyhow!("{:?}", other)), + }, + Work(..) => match self + .rt + .block_on(self.api.delete_work_edit(editgroup_id, edit_id))? + { + fatcat_openapi::DeleteWorkEditResponse::DeletedEdit(..) => Ok(()), + other => Err(anyhow!("{:?}", other)), + }, + Container(..) => match self + .rt + .block_on(self.api.delete_container_edit(editgroup_id, edit_id))? + { + fatcat_openapi::DeleteContainerEditResponse::DeletedEdit(..) => Ok(()), + other => Err(anyhow!("{:?}", other)), + }, + Creator(..) => match self + .rt + .block_on(self.api.delete_creator_edit(editgroup_id, edit_id))? + { + fatcat_openapi::DeleteCreatorEditResponse::DeletedEdit(..) => Ok(()), + other => Err(anyhow!("{:?}", other)), + }, + File(..) => match self + .rt + .block_on(self.api.delete_file_edit(editgroup_id, edit_id))? + { + fatcat_openapi::DeleteFileEditResponse::DeletedEdit(..) => Ok(()), + other => Err(anyhow!("{:?}", other)), + }, + FileSet(..) => match self + .rt + .block_on(self.api.delete_fileset_edit(editgroup_id, edit_id))? + { + fatcat_openapi::DeleteFilesetEditResponse::DeletedEdit(..) => Ok(()), + other => Err(anyhow!("{:?}", other)), + }, + WebCapture(..) => match self + .rt + .block_on(self.api.delete_webcapture_edit(editgroup_id, edit_id))? + { + fatcat_openapi::DeleteWebcaptureEditResponse::DeletedEdit(..) => Ok(()), + other => Err(anyhow!("{:?}", other)), + }, + EditorUsername(..) | ReleaseLookup(..) | ContainerLookup(..) | FileLookup(..) + | CreatorLookup(..) | Editgroup(..) | Editor(..) | Changelog(..) => { + panic!("this entity type doesn't exist in editgroups") + } + } + } + + pub fn update_entity_from_json( + &mut self, + specifier: Specifier, + json_str: &str, + editgroup_id: String, + ) -> Result<models::EntityEdit> { + use Specifier::*; + let specifier = specifier.into_entity_specifier(self)?; + let eg = match self + .rt + .block_on(self.api.get_editgroup(editgroup_id.clone()))? + { + fatcat_openapi::GetEditgroupResponse::Found(model) => Ok(model), + fatcat_openapi::GetEditgroupResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetEditgroupResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: editgroup_{:?}", editgroup_id)), + }?; + if let Some(entity_edit) = self.existing_edit_in_editgroup(&eg, &specifier) { + self.delete_editgroup_edit(&eg, &specifier, &entity_edit)?; + }; + match specifier.clone() { + Release(fcid) => match self.rt.block_on(self.api.update_release( + editgroup_id, + fcid, + serde_json::from_str(&json_str)?, + ))? { + fatcat_openapi::UpdateReleaseResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Work(fcid) => match self.rt.block_on(self.api.update_work( + editgroup_id, + fcid, + serde_json::from_str(&json_str)?, + ))? { + fatcat_openapi::UpdateWorkResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Container(fcid) => match self.rt.block_on(self.api.update_container( + editgroup_id, + fcid, + serde_json::from_str(&json_str)?, + ))? { + fatcat_openapi::UpdateContainerResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Creator(fcid) => match self.rt.block_on(self.api.update_creator( + editgroup_id, + fcid, + serde_json::from_str(&json_str)?, + ))? { + fatcat_openapi::UpdateCreatorResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + File(fcid) => match self.rt.block_on(self.api.update_file( + editgroup_id, + fcid, + serde_json::from_str(&json_str)?, + ))? { + fatcat_openapi::UpdateFileResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + FileSet(fcid) => match self.rt.block_on(self.api.update_fileset( + editgroup_id, + fcid, + serde_json::from_str(&json_str)?, + ))? { + fatcat_openapi::UpdateFilesetResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + WebCapture(fcid) => match self.rt.block_on(self.api.update_webcapture( + editgroup_id, + fcid, + serde_json::from_str(&json_str)?, + ))? { + fatcat_openapi::UpdateWebcaptureResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Editgroup(..) | Editor(..) => unimplemented!("updates for this entity type"), + Changelog(..) => return Err(anyhow!("deleting this entity type doesn't make sense")), + EditorUsername(..) | ReleaseLookup(..) | ContainerLookup(..) | FileLookup(..) + | CreatorLookup(..) => return Err(anyhow!("into_entity_specifier() didn't work?")), + } + .with_context(|| format!("failed to update {:?}", specifier)) + } + + pub fn create_editgroup(&mut self, description: Option<String>) -> Result<models::Editgroup> { + let mut eg = models::Editgroup::new(); + eg.description = description; + eg.extra = Some({ + let mut extra = std::collections::HashMap::new(); + extra.insert( + "agent".to_string(), + serde_json::Value::String("fatcat-cli".to_string()), + // TODO: version? + ); + extra + }); + let result = self.rt.block_on(self.api.create_editgroup(eg))?; + match result { + fatcat_openapi::CreateEditgroupResponse::SuccessfullyCreated(eg) => Ok(eg), + other => Err(anyhow!("{:?}", other)).context("failed to create editgroup"), + } + } + + pub fn accept_editgroup(&mut self, editgroup_id: String) -> Result<models::Success> { + let result = self + .rt + .block_on(self.api.accept_editgroup(editgroup_id.clone())) + .context("accept editgroup")?; + match result { + fatcat_openapi::AcceptEditgroupResponse::MergedSuccessfully(msg) => Ok(msg), + other => Err(anyhow!( + "failed to accept editgroup {}: {:?}", + editgroup_id, + other + )), + } + } +} diff --git a/fatcat-cli/src/commands.rs b/fatcat-cli/src/commands.rs new file mode 100644 index 0000000..15bfc81 --- /dev/null +++ b/fatcat-cli/src/commands.rs @@ -0,0 +1,491 @@ +use anyhow::{anyhow, Context, Result}; +use chrono_humanize::HumanTime; +use fatcat_openapi::models; +#[allow(unused_imports)] +use log::{self, debug, info}; +use std::convert::TryInto; +use std::fs::File; +use std::io::{self, BufRead, Write}; +use std::path::PathBuf; +use tabwriter::TabWriter; +use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; + +use crate::{ + entity_model_from_json_str, read_entity_file, ApiModelSer, EntityType, FatcatApiClient, + Mutation, Specifier, +}; + +// Want to show: +// - whether api_token found +// - configured api_host we are connecting to +// - whether we can connect to remote host (eg, get most recent changelog) +// - whether our auth is valid +// - current active editgroup +#[derive(Debug, PartialEq, Clone, serde::Serialize)] +pub struct ClientStatus { + pub has_api_token: bool, + pub api_host: String, + pub last_changelog: Option<i64>, + pub account: Option<models::Editor>, +} + +impl ClientStatus { + pub fn generate(api_client: &mut FatcatApiClient) -> Result<Self> { + let last_changelog = match api_client + .rt + .block_on(api_client.api.get_changelog(Some(1))) + { + Ok(fatcat_openapi::GetChangelogResponse::Success(entry_vec)) => { + Some(entry_vec[0].index) + } + Ok(_) | Err(_) => None, + }; + let has_api_token = api_client.api_token.is_some(); + let account: Option<models::Editor> = if has_api_token && last_changelog.is_some() { + match api_client + .rt + .block_on(api_client.api.auth_check(None)) + .context("check auth token")? + { + fatcat_openapi::AuthCheckResponse::Success(_) => Ok(()), + fatcat_openapi::AuthCheckResponse::Forbidden(err) => { + Err(anyhow!("Forbidden ({}): {}", err.error, err.message)) + } + fatcat_openapi::AuthCheckResponse::NotAuthorized { body: err, .. } => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + resp => return Err(anyhow!("{:?}", resp)).context("auth check failed"), + } + .context("check auth token")?; + match api_client + .rt + .block_on( + api_client + .api + .get_editor(api_client.editor_id.as_ref().unwrap().to_string()), + ) + .context("fetching editor account info")? + { + fatcat_openapi::GetEditorResponse::Found(editor) => Some(editor), + fatcat_openapi::GetEditorResponse::NotFound(err) => { + return Err(anyhow!("Not Found: {}", err.message)) + } + resp => return Err(anyhow!("{:?}", resp)).context("editor fetch failed"), + } + } else { + None + }; + Ok(ClientStatus { + api_host: api_client.api_host.clone(), + has_api_token, + last_changelog, + account, + }) + } + + pub fn pretty_print(self) -> Result<()> { + let mut color_stdout = StandardStream::stdout(if atty::is(atty::Stream::Stdout) { + ColorChoice::Auto + } else { + ColorChoice::Never + }); + let color_normal = ColorSpec::new(); + let mut color_bold = ColorSpec::new(); + color_bold.set_bold(true); + let mut color_happy = ColorSpec::new(); + color_happy.set_fg(Some(Color::Green)).set_bold(true); + let mut color_sad = ColorSpec::new(); + color_sad.set_fg(Some(Color::Red)).set_bold(true); + + color_stdout.set_color(&color_normal)?; + write!(&mut color_stdout, "{:>16}: ", "API host")?; + color_stdout.set_color(&color_bold)?; + write!(&mut color_stdout, "{}", self.api_host)?; + match self.last_changelog { + Some(index) => { + color_stdout.set_color(&color_happy)?; + writeln!(&mut color_stdout, " [successfully connected]")?; + color_stdout.set_color(&color_normal)?; + write!(&mut color_stdout, "{:>16}: ", "Last changelog")?; + color_stdout.set_color(&color_bold)?; + writeln!(&mut color_stdout, "{}", index)?; + } + None => { + color_stdout.set_color(&color_sad)?; + writeln!(&mut color_stdout, " [Failed to connect]")?; + } + }; + color_stdout.set_color(&color_normal)?; + write!(&mut color_stdout, "{:>16}: ", "API auth token")?; + if self.has_api_token { + color_stdout.set_color(&color_happy)?; + writeln!(&mut color_stdout, "[configured]")?; + } else { + color_stdout.set_color(&color_sad)?; + writeln!(&mut color_stdout, "[not configured]")?; + }; + if let Some(editor) = self.account { + color_stdout.set_color(&color_normal)?; + write!(&mut color_stdout, "{:>16}: ", "Account")?; + color_stdout.set_color(&color_bold)?; + write!(&mut color_stdout, "{}", editor.username)?; + if editor.is_bot == Some(true) { + color_stdout + .set_color(ColorSpec::new().set_fg(Some(Color::Blue)).set_bold(true))?; + write!(&mut color_stdout, " [bot]")?; + } + if editor.is_admin == Some(true) { + color_stdout + .set_color(ColorSpec::new().set_fg(Some(Color::Magenta)).set_bold(true))?; + write!(&mut color_stdout, " [admin]")?; + } + match editor.is_active { + Some(true) => { + color_stdout.set_color(&color_happy)?; + writeln!(&mut color_stdout, " [active]")?; + } + Some(false) | None => { + color_stdout.set_color(&color_sad)?; + writeln!(&mut color_stdout, " [disabled]")?; + } + }; + color_stdout.set_color(&color_normal)?; + writeln!( + &mut color_stdout, + "{:>16} editor_{}", + "", + editor.editor_id.unwrap() + )?; + }; + color_stdout.set_color(&color_normal)?; + Ok(()) + } +} + +pub fn print_editgroups(eg_list: Vec<models::Editgroup>, json: bool) -> Result<()> { + if json { + for eg in eg_list { + writeln!(&mut std::io::stdout(), "{}", eg.to_json_string()?)?; + } + } else { + let mut tw = TabWriter::new(std::io::stdout()); + writeln!( + tw, + "editgroup_id\tchangelog_index\tcreated\tsubmitted\tdescription" + )?; + for eg in eg_list { + writeln!( + tw, + "{}\t{}\t{}\t{}\t{}", + eg.editgroup_id.unwrap(), + eg.changelog_index + .map_or("-".to_string(), |v| v.to_string()), + eg.created + .map_or("-".to_string(), |v| HumanTime::from(v).to_string()), + eg.submitted + .map_or("-".to_string(), |v| HumanTime::from(v).to_string()), + eg.description.unwrap_or_else(|| "-".to_string()) + )?; + } + tw.flush()?; + } + Ok(()) +} + +pub fn print_changelog_entries(entry_list: Vec<models::ChangelogEntry>, json: bool) -> Result<()> { + if json { + for entry in entry_list { + writeln!(&mut std::io::stdout(), "{}", entry.to_json_string()?)?; + } + } else { + let mut tw = TabWriter::new(std::io::stdout()); + writeln!(tw, "index\ttimestamp\teditor\teditgroup_description")?; + for entry in entry_list { + writeln!( + tw, + "{}\t{}\t{}\t{}", + entry.index, + HumanTime::from(entry.timestamp).to_string(), + entry + .editgroup + .as_ref() + .unwrap() + .editor + .as_ref() + .map_or("-".to_string(), |v| v.username.to_string()), + entry + .editgroup + .as_ref() + .unwrap() + .description + .as_ref() + .map_or("-".to_string(), |v| v.to_string()), + )?; + } + tw.flush()?; + } + Ok(()) +} + +pub fn print_entity_histories( + history_list: Vec<models::EntityHistoryEntry>, + json: bool, +) -> Result<()> { + if json { + for history in history_list { + writeln!(&mut std::io::stdout(), "{}", history.to_json_string()?)?; + } + } else { + let mut tw = TabWriter::new(std::io::stdout()); + writeln!( + tw, + "changelog_index\ttype\ttimestamp\teditor\teditgroup_description" + )?; + for history in history_list { + let state = match ( + history.edit.revision, + history.edit.prev_revision, + history.edit.redirect_ident, + ) { + (Some(_), None, None) => "create", + (Some(_), Some(_), None) => "update", + (None, _, None) => "delete", + (None, _, Some(_)) => "redirect", + _ => "-", + }; + writeln!( + tw, + "{}\t{}\t{}\t{}\t{}", + history.changelog_entry.index, + state, + HumanTime::from(history.changelog_entry.timestamp).to_string(), + history + .editgroup + .editor + .map_or("-".to_string(), |v| v.username.to_string()), + history + .editgroup + .description + .unwrap_or_else(|| "-".to_string()) + )?; + } + tw.flush()?; + } + Ok(()) +} + +pub fn edit_entity_locally( + api_client: &mut FatcatApiClient, + specifier: Specifier, + editgroup_id: String, + json: bool, + editing_command: String, +) -> Result<models::EntityEdit> { + // TODO: fetch editgroup, check if this entity is already being updated in it. If so, + // need to fetch that revision, do the edit, parse that synatx is good, then delete the + // existing edit and update with the new one. + let original_entity = specifier.get_from_api(api_client, None, None)?; + let exact_specifier = original_entity.specifier(); + let tmp_file = tempfile::Builder::new() + .suffix(if json { ".json" } else { ".toml" }) + .tempfile()?; + if json { + writeln!(&tmp_file, "{}", original_entity.to_json_string()?)? + } else { + writeln!(&tmp_file, "{}", original_entity.to_toml_string()?)? + } + let mut editor_cmd = std::process::Command::new(&editing_command) + .arg(tmp_file.path()) + .spawn() + .expect("failed to execute process"); + let cmd_status = editor_cmd.wait()?; + if !cmd_status.success() { + return Err(anyhow!( + "editor ({}) exited with non-success status code ({}), bailing on edit", + editing_command, + cmd_status + .code() + .map(|v| v.to_string()) + .unwrap_or_else(|| "N/A".to_string()) + )); + }; + let json_str = read_entity_file(Some(tmp_file.path().to_path_buf()))?; + // for whatever reason api_client's TCP connection is broken after spawning, so try a + // dummy call, expected to fail, but connection should re-establish after this + specifier + .get_from_api(api_client, None, None) + .context("re-fetch") + .ok(); + let ee = api_client + .update_entity_from_json(exact_specifier, &json_str, editgroup_id) + .context("updating after edit")?; + Ok(ee) +} + +pub enum BatchOp { + Create, + Update, + Delete, +} + +pub struct BatchGrouper { + entity_type: EntityType, + batch_size: u64, + limit: Option<u64>, + auto_accept: bool, + editgroup_description: String, + current_count: u64, + current_editgroup_id: Option<String>, + total_count: u64, +} + +// Note: should be possible to add support for the single-call batch create endpoint by storing +// edits in a batch within this object. Might need to change control flow a bit. This optimization +// was mostly intended for bootstrapping of tens of thousands of entities, so not including for +// now. +impl BatchGrouper { + pub fn new( + entity_type: EntityType, + batch_size: u64, + limit: Option<u64>, + auto_accept: bool, + ) -> Self { + let editgroup_description = "part of a fatcat-cli batch operation".to_string(); + BatchGrouper { + entity_type, + batch_size, + limit, + auto_accept, + editgroup_description, + current_count: 0, + current_editgroup_id: None, + total_count: 0, + } + } + + pub fn run( + &mut self, + api_client: &mut FatcatApiClient, + input_path: Option<PathBuf>, + op: BatchOp, + mutations: Option<Vec<Mutation>>, + ) -> Result<()> { + match input_path { + None => { + let stdin = io::stdin(); + let stdin_lock = stdin.lock(); + let lines = stdin_lock.lines(); + for line in lines { + let json_str = line?; + match op { + BatchOp::Create => self.push_create(api_client, &json_str)?, + BatchOp::Update => self.push_update( + api_client, + &json_str, + mutations.clone().unwrap_or(vec![]), + )?, + BatchOp::Delete => self.push_delete(api_client, &json_str)?, + }; + if let Some(limit) = self.limit { + if self.total_count + self.current_count >= limit { + break; + } + } + } + } + Some(path) => { + let input_file = File::open(path)?; + let buffered = io::BufReader::new(input_file); + let lines = buffered.lines(); + for line in lines { + let json_str = line?; + match op { + BatchOp::Create => self.push_create(api_client, &json_str)?, + BatchOp::Update => self.push_update( + api_client, + &json_str, + mutations.clone().unwrap_or(vec![]), + )?, + BatchOp::Delete => self.push_delete(api_client, &json_str)?, + }; + if let Some(limit) = self.limit { + if self.total_count + self.current_count >= limit { + break; + } + } + } + } + } + self.flush(api_client)?; + Ok(()) + } + + pub fn push_create( + &mut self, + api_client: &mut FatcatApiClient, + json_str: &str, + ) -> Result<models::EntityEdit> { + let editgroup_id = self.increment_editgroup(api_client)?; + api_client.create_entity_from_json(self.entity_type, json_str, editgroup_id) + } + + pub fn push_update( + &mut self, + api_client: &mut FatcatApiClient, + json_str: &str, + mutations: Vec<Mutation>, + ) -> Result<models::EntityEdit> { + let obj: serde_json::Value = serde_json::from_str(json_str)?; + let ident = obj["ident"].as_str().unwrap(); // TODO: safer extraction of this ident? + let editgroup_id = self.increment_editgroup(api_client)?; + let mut entity = entity_model_from_json_str(self.entity_type, &json_str)?; + entity.mutate(mutations)?; + api_client.update_entity_from_json( + Specifier::from_ident(self.entity_type, ident.to_string()), + &entity.to_json_string()?, + editgroup_id, + ) + } + + pub fn push_delete( + &mut self, + api_client: &mut FatcatApiClient, + json_str: &str, + ) -> Result<models::EntityEdit> { + let obj: serde_json::Value = serde_json::from_str(json_str)?; + let ident = obj["ident"].as_str().unwrap(); // TODO: safer extraction of this ident? + let editgroup_id = self.increment_editgroup(api_client)?; + api_client.delete_entity( + Specifier::from_ident(self.entity_type, ident.to_string()), + editgroup_id, + ) + } + + pub fn increment_editgroup(&mut self, api_client: &mut FatcatApiClient) -> Result<String> { + if self.current_count >= self.batch_size.try_into().unwrap() { + self.flush(api_client)?; + }; + self.current_count += 1; + if let Some(eg) = &self.current_editgroup_id { + return Ok(eg.to_string()); + } + let eg = api_client.create_editgroup(Some(self.editgroup_description.clone()))?; + self.current_editgroup_id = eg.editgroup_id; + Ok(self.current_editgroup_id.as_ref().unwrap().to_string()) + } + + pub fn flush(&mut self, api_client: &mut FatcatApiClient) -> Result<()> { + if self.current_count > 0 && self.current_editgroup_id.is_some() { + let eg_id = self.current_editgroup_id.clone().unwrap(); + if self.auto_accept { + api_client.accept_editgroup(eg_id)?; + } else { + api_client.update_editgroup_submit(eg_id, true)?; + } + self.total_count += self.current_count; + self.current_count = 0; + self.current_editgroup_id = None; + } + Ok(()) + } +} diff --git a/fatcat-cli/src/download.rs b/fatcat-cli/src/download.rs new file mode 100644 index 0000000..7821b70 --- /dev/null +++ b/fatcat-cli/src/download.rs @@ -0,0 +1,261 @@ +use anyhow::{anyhow, Context, Result}; +use fatcat_openapi::models::{FileEntity, ReleaseEntity}; +use indicatif::ProgressBar; +use log::info; +use reqwest::header::USER_AGENT; +use std::fmt; +use std::fs::File; +use std::io::{self, BufRead}; +use std::path::Path; +use std::path::PathBuf; +use url::Url; + +#[derive(Debug, PartialEq, Clone)] +pub enum DownloadStatus { + Exists(String), + Downloaded(String), + NetworkError(String), + HttpError(u16), + PartialExists(String), + NoPublicFile, + FileMissingMetadata, + WrongSize, + WrongHash, +} + +impl DownloadStatus { + pub fn details(&self) -> Option<String> { + match self { + Self::Exists(p) => Some(p.to_string()), + Self::Downloaded(p) => Some(p.to_string()), + Self::NetworkError(p) => Some(p.to_string()), + Self::PartialExists(p) => Some(p.to_string()), + _ => None, + } + } +} + +impl fmt::Display for DownloadStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Exists(_) => write!(f, "exists"), + Self::Downloaded(_) => write!(f, "success"), + Self::NetworkError(_) => write!(f, "network-error"), + Self::HttpError(p) => write!(f, "http-{}", p), + Self::PartialExists(_) => write!(f, "partial-exists"), + Self::NoPublicFile => write!(f, "no-public-file"), + Self::FileMissingMetadata => write!(f, "missing-file-metadata"), + Self::WrongSize => write!(f, "wrong-file-size"), + Self::WrongHash => write!(f, "wrong-file-hash"), + } + } +} + +// eg, https://web.archive.org/web/20140802044207/http://www.geo.coop:80/sites/default/files/labs_of_oligarchy.pdf +fn rewrite_wayback_url(url: Url) -> Result<Url> { + // TODO: make this function correct, and add tests + let mut segments: Vec<String> = url + .path_segments() + .unwrap() + .map(|x| x.to_string()) + .collect(); + if segments[0] == "web" && segments[1].len() == 14 { + segments[1] = format!("{}id_", segments[1]); + } + Ok(url) +} + +/// Attempts to download a file entity, including verifying checksum. +pub fn download_file(fe: &FileEntity) -> Result<DownloadStatus> { + let sha1hex = match &fe.sha1 { + Some(v) => v, + None => return Ok(DownloadStatus::FileMissingMetadata), + }; + let expected_size = match fe.size { + Some(v) => v as u64, + None => return Ok(DownloadStatus::FileMissingMetadata), + }; + + let file_suffix = match fe.mimetype.as_ref().map(String::as_str) { + Some("application/pdf") => ".pdf", + Some("application/postscript") => ".pdf", + Some("text/html") => ".html", + Some("text/xml") => ".xml", + _ => ".unknown", + }; + + // TODO: output directory + let path_string = format!("{}{}", sha1hex, file_suffix); + let final_path = Path::new(&path_string); + + // NOTE: this isn't perfect; there could have been a race condition + if final_path.exists() { + return Ok(DownloadStatus::Exists( + final_path.to_string_lossy().to_string(), + )); + }; + + let path_string = format!("{}{}.partial", sha1hex, file_suffix); + let download_path = Path::new(&path_string); + + // TODO: only archive.org URLs (?) + let raw_url = match fe.urls.as_ref() { + None => return Ok(DownloadStatus::NoPublicFile), + Some(url_list) if url_list.len() == 0 => return Ok(DownloadStatus::NoPublicFile), + // TODO: remove clone (?) + // TODO: better heuristic than "just try first URL" + Some(url_list) => url_list[0].url.clone(), + }; + + let mut url = Url::parse(&raw_url)?; + if url.host_str() == Some("web.archive.org") { + url = rewrite_wayback_url(url)?; + } + + let download_file = match std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(download_path) + { + Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { + return Ok(DownloadStatus::PartialExists( + download_path.to_string_lossy().to_string(), + )) + } + Err(e) => return Err(e).context("opening temporary download file"), + Ok(f) => f, + }; + + // TODO: print to stderr + info!("downloading: {}", url); + let client = reqwest::blocking::Client::new(); + let mut resp = match client + .get(url) + .header(USER_AGENT, "fatcat-cli/0.0.0") + .send() + { + Ok(r) => r, + Err(e) => { + std::fs::remove_file(download_path)?; + return Ok(DownloadStatus::NetworkError(format!("{:?}", e))); + } + }; + + // TODO: parse headers, eg size (?) + if !resp.status().is_success() { + std::fs::remove_file(download_path)?; + return Ok(DownloadStatus::HttpError(resp.status().as_u16())); + } + + // TODO: what if no filesize? + // TODO: compare with resp.content_length(() -> Option<u64> + let pb = ProgressBar::new(fe.size.unwrap() as u64); + let out_size = match resp.copy_to(&mut pb.wrap_write(download_file)) { + Ok(r) => r, + Err(e) => { + std::fs::remove_file(download_path)?; + return Ok(DownloadStatus::NetworkError(format!("{:?}", e))); + } + }; + + if out_size != expected_size { + std::fs::remove_file(download_path)?; + return Ok(DownloadStatus::WrongSize); + } + + std::fs::rename(download_path, final_path)?; + Ok(DownloadStatus::Downloaded( + final_path.to_string_lossy().to_string(), + )) +} + +pub fn download_release(re: &ReleaseEntity) -> Result<DownloadStatus> { + let file_entities = match &re.files { + None => { + return Err(anyhow!( + "expected file sub-entities to be 'expanded' on release" + )) + } + Some(list) => list, + }; + let mut status = DownloadStatus::NoPublicFile; + for fe in file_entities { + status = download_file(&fe)?; + match status { + DownloadStatus::Exists(_) | DownloadStatus::Downloaded(_) => break, + _ => (), + }; + } + Ok(status) +} + +/// Tries either file or release +fn download_entity(json_str: String) -> Result<DownloadStatus> { + let release_attempt = serde_json::from_str::<ReleaseEntity>(&json_str); + if let Ok(re) = release_attempt { + if re.ident.is_some() && (re.title.is_some() || re.files.is_some()) { + let status = download_release(&re)?; + println!( + "release_{}\t{}\t{}", + re.ident.unwrap(), + status, + status.details().unwrap_or("".to_string()) + ); + return Ok(status); + }; + } + let file_attempt = + serde_json::from_str::<FileEntity>(&json_str).context("parsing entity for download"); + match file_attempt { + Ok(fe) => { + if fe.ident.is_some() && fe.urls.is_some() { + let status = download_file(&fe)?; + println!( + "file_{}\t{}\t{}", + fe.ident.unwrap(), + status, + status.details().unwrap_or("".to_string()) + ); + return Ok(status); + } else { + Err(anyhow!("not a file entity (JSON)")) + } + } + Err(e) => Err(e), + } +} + +pub fn download_batch(input_path: Option<PathBuf>, limit: Option<u64>) -> Result<u64> { + let count = 0; + match input_path { + None => { + let stdin = io::stdin(); + let stdin_lock = stdin.lock(); + let lines = stdin_lock.lines(); + for line in lines { + let json_str = line?; + download_entity(json_str)?; + if let Some(limit) = limit { + if count >= limit { + break; + } + } + } + } + Some(path) => { + let input_file = File::open(path)?; + let buffered = io::BufReader::new(input_file); + let lines = buffered.lines(); + for line in lines { + let json_str = line?; + download_entity(json_str)?; + if let Some(limit) = limit { + if count >= limit { + break; + } + } + } + } + }; + Ok(count) +} diff --git a/fatcat-cli/src/entities.rs b/fatcat-cli/src/entities.rs new file mode 100644 index 0000000..d61f6dc --- /dev/null +++ b/fatcat-cli/src/entities.rs @@ -0,0 +1,411 @@ +use crate::{EntityType, Specifier}; +use anyhow::{anyhow, Context, Result}; +use fatcat_openapi::models; +use lazy_static::lazy_static; +use log::{self, info}; +use regex::Regex; +use std::io::{BufRead, Read}; +use std::path::PathBuf; +use std::str::FromStr; + +#[derive(Debug, PartialEq, Clone)] +pub struct Mutation { + field: String, + value: Option<String>, +} + +impl FromStr for Mutation { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + // first try simple entity prefixes + lazy_static! { + static ref MUTATE_ENTITY_RE: Regex = Regex::new(r"^([a-z_]+)=(.*)$").unwrap(); + } + if let Some(captures) = MUTATE_ENTITY_RE.captures(s) { + return Ok(Mutation { + field: captures[1].to_string(), + value: match &captures[2] { + "" => None, + val => Some(val.to_string()), + }, + }); + } + Err(anyhow!("not a field mutation: {}", s)) + } +} + +/* + * Goal is to have traits around API entities. Things we'll want to do on concrete entities: + * + * - print, or pretty-print, as JSON or TOML + * - get fcid (or, self-specifier) + * - update (mutate or return copy) fields based on parameters + * - update self to remote API + * + * Methods that might return trait objects: + * + * - get by specifier + */ + +pub trait ApiEntityModel: ApiModelSer + ApiModelIdent + ApiModelMutate {} + +impl ApiEntityModel for models::ReleaseEntity {} +impl ApiEntityModel for models::ContainerEntity {} +impl ApiEntityModel for models::CreatorEntity {} +impl ApiEntityModel for models::WorkEntity {} +impl ApiEntityModel for models::FileEntity {} +impl ApiEntityModel for models::FilesetEntity {} +impl ApiEntityModel for models::WebcaptureEntity {} +impl ApiEntityModel for models::Editor {} +impl ApiEntityModel for models::Editgroup {} +impl ApiEntityModel for models::ChangelogEntry {} + +pub fn read_entity_file(input_path: Option<PathBuf>) -> Result<String> { + // treat "-" as "use stdin" + let input_path = match input_path { + Some(s) if s.to_string_lossy() == "-" => None, + _ => input_path, + }; + match input_path { + None => { + let mut line = String::new(); + std::io::stdin().read_line(&mut line)?; + Ok(line) + } + Some(path) if path.extension().map(|v| v.to_str()) == Some(Some("toml")) => { + info!("reading {:?} as TOML", path); + // as a hack, read TOML but then serialize it back to JSON + let mut contents = String::new(); + let mut input_file = + std::fs::File::open(path).context("reading entity from TOML file")?; + input_file.read_to_string(&mut contents)?; + let value: toml::Value = contents.parse().context("parsing TOML file")?; + Ok(serde_json::to_string(&value)?) + } + Some(path) => { + let mut line = String::new(); + let input_file = std::fs::File::open(path)?; + let mut buffered = std::io::BufReader::new(input_file); + buffered.read_line(&mut line)?; + Ok(line) + } + } +} + +pub fn entity_model_from_json_str( + entity_type: EntityType, + json_str: &str, +) -> Result<Box<dyn ApiEntityModel>> { + match entity_type { + EntityType::Release => Ok(Box::new(serde_json::from_str::<models::ReleaseEntity>( + &json_str, + )?)), + EntityType::Work => Ok(Box::new(serde_json::from_str::<models::WorkEntity>( + &json_str, + )?)), + EntityType::Container => Ok(Box::new(serde_json::from_str::<models::ContainerEntity>( + &json_str, + )?)), + EntityType::Creator => Ok(Box::new(serde_json::from_str::<models::CreatorEntity>( + &json_str, + )?)), + EntityType::File => Ok(Box::new(serde_json::from_str::<models::FileEntity>( + &json_str, + )?)), + EntityType::FileSet => Ok(Box::new(serde_json::from_str::<models::FilesetEntity>( + &json_str, + )?)), + EntityType::WebCapture => Ok(Box::new(serde_json::from_str::<models::WebcaptureEntity>( + &json_str, + )?)), + } +} + +pub trait ApiModelSer { + fn to_json_string(&self) -> Result<String>; + fn to_toml_string(&self) -> Result<String>; +} + +impl<T: serde::Serialize> ApiModelSer for T { + fn to_json_string(&self) -> Result<String> { + Ok(serde_json::to_string(self)?) + } + + fn to_toml_string(&self) -> Result<String> { + Ok(toml::Value::try_from(self)?.to_string()) + } +} + +pub trait ApiModelIdent { + fn specifier(&self) -> Specifier; +} + +macro_rules! generic_entity_specifier { + ($specifier_type:ident) => { + fn specifier(&self) -> Specifier { + if let Some(fcid) = &self.ident { + Specifier::$specifier_type(fcid.to_string()) + } else { + panic!("expected full entity") + } + } + }; +} + +impl ApiModelIdent for models::ReleaseEntity { + generic_entity_specifier!(Release); +} +impl ApiModelIdent for models::ContainerEntity { + generic_entity_specifier!(Container); +} +impl ApiModelIdent for models::CreatorEntity { + generic_entity_specifier!(Creator); +} +impl ApiModelIdent for models::WorkEntity { + generic_entity_specifier!(Work); +} +impl ApiModelIdent for models::FileEntity { + generic_entity_specifier!(File); +} +impl ApiModelIdent for models::FilesetEntity { + generic_entity_specifier!(FileSet); +} +impl ApiModelIdent for models::WebcaptureEntity { + generic_entity_specifier!(WebCapture); +} + +impl ApiModelIdent for models::ChangelogEntry { + fn specifier(&self) -> Specifier { + Specifier::Changelog(self.index) + } +} + +impl ApiModelIdent for models::Editgroup { + fn specifier(&self) -> Specifier { + if let Some(fcid) = &self.editgroup_id { + Specifier::Editgroup(fcid.to_string()) + } else { + panic!("expected full entity") + } + } +} + +impl ApiModelIdent for models::Editor { + fn specifier(&self) -> Specifier { + if let Some(fcid) = &self.editor_id { + Specifier::Editor(fcid.to_string()) + } else { + panic!("expected full entity") + } + } +} + +pub trait ApiModelMutate { + fn mutate(&mut self, mutations: Vec<Mutation>) -> Result<()>; +} + +impl ApiModelMutate for models::ReleaseEntity { + fn mutate(&mut self, mutations: Vec<Mutation>) -> Result<()> { + for m in mutations { + match (m.field.as_str(), m.value) { + ("title", val) => { + self.title = val; + } + ("subtitle", val) => { + self.subtitle = val; + } + ("container_id", val) => { + self.container_id = val; + } + ("work_id", val) => { + self.work_id = val; + } + ("release_type", val) => { + self.release_type = val; + } + ("release_stage", val) => { + self.release_stage = val; + } + ("withdrawn_status", val) => { + self.withdrawn_status = val; + } + ("license_slug", val) => { + self.license_slug = val; + } + ("volume", val) => { + self.volume = val; + } + ("issue", val) => { + self.issue = val; + } + ("number", val) => { + self.number = val; + } + ("publisher", val) => { + self.publisher = val; + } + ("language", val) => { + self.language = val; + } + (field, _) => unimplemented!("setting field {} on a release", field), + } + } + Ok(()) + } +} + +impl ApiModelMutate for models::ContainerEntity { + fn mutate(&mut self, mutations: Vec<Mutation>) -> Result<()> { + for m in mutations { + match (m.field.as_str(), m.value) { + ("name", val) => { + self.name = val; + } + ("container_type", val) => { + self.container_type = val; + } + ("publisher", val) => { + self.publisher = val; + } + ("issnl", val) => { + self.issnl = val; + } + (field, _) => unimplemented!("setting field {} on a container", field), + } + } + Ok(()) + } +} + +impl ApiModelMutate for models::CreatorEntity { + fn mutate(&mut self, mutations: Vec<Mutation>) -> Result<()> { + for m in mutations { + match (m.field.as_str(), m.value) { + ("display_name", val) => { + self.display_name = val; + } + ("given_name", val) => { + self.given_name = val; + } + ("surname", val) => { + self.surname = val; + } + (field, _) => unimplemented!("setting field {} on a creator", field), + } + } + Ok(()) + } +} + +impl ApiModelMutate for models::WorkEntity { + fn mutate(&mut self, _mutations: Vec<Mutation>) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::FileEntity { + fn mutate(&mut self, mutations: Vec<Mutation>) -> Result<()> { + for m in mutations { + match (m.field.as_str(), m.value) { + ("size", Some(val)) => { + self.size = Some(i64::from_str(&val)?); + } + ("size", None) => { + self.size = None; + } + ("md5", val) => { + self.md5 = val; + } + ("sha1", val) => { + self.sha1 = val; + } + ("sha256", val) => { + self.sha256 = val; + } + ("mimetype", val) => { + self.mimetype = val; + } + (field, _) => unimplemented!("setting field {} on a file", field), + } + } + Ok(()) + } +} + +impl ApiModelMutate for models::FilesetEntity { + fn mutate(&mut self, _mutations: Vec<Mutation>) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::WebcaptureEntity { + fn mutate(&mut self, _mutations: Vec<Mutation>) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::Editor { + fn mutate(&mut self, mutations: Vec<Mutation>) -> Result<()> { + for m in mutations { + match (m.field.as_str(), m.value) { + ("username", Some(val)) => { + self.username = val; + } + (field, _) => unimplemented!("setting field {} on an editor", field), + } + } + Ok(()) + } +} + +impl ApiModelMutate for models::Editgroup { + fn mutate(&mut self, mutations: Vec<Mutation>) -> Result<()> { + for m in mutations { + match (m.field.as_str(), m.value) { + ("description", val) => { + self.description = val; + } + (field, _) => unimplemented!("setting field {} on an editgroup", field), + } + } + Ok(()) + } +} + +impl ApiModelMutate for models::ChangelogEntry { + fn mutate(&mut self, _mutations: Vec<Mutation>) -> Result<()> { + unimplemented!("mutations") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mutation_from_str() -> () { + assert!(Mutation::from_str("release_asdf").is_err()); + assert_eq!( + Mutation::from_str("title=blah").unwrap(), + Mutation { + field: "title".to_string(), + value: Some("blah".to_string()) + } + ); + assert_eq!( + Mutation::from_str("title=").unwrap(), + Mutation { + field: "title".to_string(), + value: None + } + ); + assert_eq!( + Mutation::from_str("title=string with spaces and stuff").unwrap(), + Mutation { + field: "title".to_string(), + value: Some("string with spaces and stuff".to_string()) + } + ); + } +} diff --git a/fatcat-cli/src/lib.rs b/fatcat-cli/src/lib.rs new file mode 100644 index 0000000..d648c1c --- /dev/null +++ b/fatcat-cli/src/lib.rs @@ -0,0 +1,93 @@ +use anyhow::{anyhow, Context, Result}; +use data_encoding::BASE64; +use macaroon::{Macaroon, Verifier}; +use std::path::PathBuf; +use std::str::FromStr; + +mod api; +mod commands; +mod download; +mod entities; +mod search; +mod specifier; + +pub use api::FatcatApiClient; +pub use commands::{ + edit_entity_locally, print_changelog_entries, print_editgroups, print_entity_histories, + BatchGrouper, BatchOp, ClientStatus, +}; +pub use download::{download_batch, download_file, download_release}; +pub use entities::{ + entity_model_from_json_str, read_entity_file, ApiEntityModel, ApiModelIdent, ApiModelSer, + Mutation, +}; +pub use search::crude_search; +pub use specifier::Specifier; + +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum EntityType { + Release, + Work, + Container, + Creator, + File, + FileSet, + WebCapture, +} + +impl FromStr for EntityType { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + match s { + "release" | "releases" => Ok(EntityType::Release), + "work" | "works" => Ok(EntityType::Work), + "container" | "containers" => Ok(EntityType::Container), + "creator" | "creators" => Ok(EntityType::Creator), + "file" | "files" => Ok(EntityType::File), + "fileset" | "filesets" => Ok(EntityType::FileSet), + "webcapture" | "webcaptures" => Ok(EntityType::WebCapture), + _ => Err(anyhow!("invalid entity type : {}", s)), + } + } +} + +/// Takes a macaroon token (as base64-encoded string) and tries to parse out an editor id +pub fn parse_macaroon_editor_id(s: &str) -> Result<String> { + let raw = BASE64 + .decode(s.as_bytes()) + .context("macaroon parsing failed")?; + let mac = Macaroon::deserialize(&raw) + .map_err(|err| anyhow!("macaroon deserialization failed: {:?}", err))?; + let mac = mac + .validate() + .map_err(|err| anyhow!("macaroon validation failed: {:?}", err))?; + let mut verifier = Verifier::new(); + let mut editor_id: Option<String> = None; + for caveat in mac.first_party_caveats() { + if caveat.predicate().starts_with("editor_id = ") { + editor_id = Some( + caveat + .predicate() + .get(12..) + .context("parsing macaroon")? + .to_string(), + ); + break; + } + } + let editor_id = match editor_id { + Some(id) => id, + None => return Err(anyhow!("expected an editor_id caveat in macaroon token")), + }; + verifier.satisfy_exact(&format!("editor_id = {}", editor_id.to_string())); + Ok(editor_id) +} + +pub fn path_or_stdin(raw: Option<PathBuf>) -> Option<PathBuf> { + // treat "-" as "use stdin" + match raw { + Some(s) if s.to_string_lossy() == "-" => None, + _ => raw, + } +} diff --git a/fatcat-cli/src/main.rs b/fatcat-cli/src/main.rs new file mode 100644 index 0000000..3b0d382 --- /dev/null +++ b/fatcat-cli/src/main.rs @@ -0,0 +1,631 @@ +use crate::{path_or_stdin, BatchGrouper, BatchOp}; +use anyhow::{anyhow, Context, Result}; +use fatcat_cli::ApiModelSer; +use fatcat_cli::*; +#[allow(unused_imports)] +use log::{self, debug, info}; +use std::io::Write; +use std::path::PathBuf; +use structopt::StructOpt; +use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; + +#[derive(StructOpt)] +#[structopt(rename_all = "kebab-case", about = "CLI interface to Fatcat API")] +struct Opt { + #[structopt( + global = true, + long = "--api-host", + env = "FATCAT_API_HOST", + default_value = "https://api.fatcat.wiki" + )] + api_host: String, + + /// API auth tokens can be generated from the account page in the fatcat.wiki web interface + #[structopt( + global = true, + long = "--api-token", + env = "FATCAT_API_AUTH_TOKEN", + hide_env_values = true + )] + api_token: Option<String>, + + #[structopt( + global = true, + long = "--search-host", + env = "FATCAT_SEARCH_HOST", + default_value = "https://search.fatcat.wiki" + )] + search_host: String, + + /// Log more messages. Pass multiple times for ever more verbosity + /// + /// By default, it'll only report errors. Passing `-v` one time also prints + /// warnings, `-vv` enables info logging, `-vvv` debug, and `-vvvv` trace. + #[structopt(global = true, long, short = "v", parse(from_occurrences))] + verbose: i8, + + #[structopt(subcommand)] + cmd: Command, +} + +#[derive(StructOpt)] +enum EditgroupCommand { + /// Create a new editgroup + Create { + #[structopt(long, short)] + description: String, + }, + /// Print editgroups for current user + List { + #[structopt(long = "--editor-id", short)] + editor_id: Option<String>, + + #[structopt(long, short = "-n", default_value = "20")] + limit: i64, + + #[structopt(long)] + json: bool, + }, + /// Print recent editgroups from any user which need review + Reviewable { + #[structopt(long, short = "-n", default_value = "20")] + limit: i64, + + #[structopt(long)] + json: bool, + }, + /// Accept (merge) a single editgroup + Accept { + #[structopt(env = "FATCAT_EDITGROUP", hide_env_values = true)] + editgroup_id: String, + }, + /// Submit a single editgroup for review + Submit { + #[structopt(env = "FATCAT_EDITGROUP", hide_env_values = true)] + editgroup_id: String, + }, + /// Un-submit a single editgroup (for more editing) + Unsubmit { + #[structopt(env = "FATCAT_EDITGROUP", hide_env_values = true)] + editgroup_id: String, + }, +} + +#[derive(StructOpt)] +enum BatchCommand { + Create { + entity_type: EntityType, + + #[structopt(long, default_value = "50")] + batch_size: u64, + + #[structopt(long)] + auto_accept: bool, + }, + Update { + entity_type: EntityType, + mutations: Vec<Mutation>, + + #[structopt(long, default_value = "50")] + batch_size: u64, + + #[structopt(long)] + auto_accept: bool, + }, + Delete { + entity_type: EntityType, + + #[structopt(long, default_value = "50")] + batch_size: u64, + + #[structopt(long)] + auto_accept: bool, + }, + Download {}, +} + +#[derive(StructOpt)] +enum Command { + Get { + specifier: Specifier, + + #[structopt(long = "--expand")] + expand: Option<String>, + + #[structopt(long = "--hide")] + hide: Option<String>, + + #[structopt(long)] + json: bool, + + #[structopt(long)] + toml: bool, + }, + Create { + entity_type: EntityType, + + /// Input file, "-" for stdin. + #[structopt(long = "--file", short = "-f", parse(from_os_str))] + input_path: Option<PathBuf>, + + #[structopt( + long = "--editgroup-id", + short, + env = "FATCAT_EDITGROUP", + hide_env_values = true + )] + editgroup_id: String, + }, + Update { + specifier: Specifier, + + /// Input file, "-" for stdin. + #[structopt(long = "--file", short = "-f", parse(from_os_str))] + input_path: Option<PathBuf>, + + #[structopt( + long = "--editgroup-id", + short, + env = "FATCAT_EDITGROUP", + hide_env_values = true + )] + editgroup_id: String, + + mutations: Vec<Mutation>, + }, + Delete { + specifier: Specifier, + + #[structopt( + long = "--editgroup-id", + short, + env = "FATCAT_EDITGROUP", + hide_env_values = true + )] + editgroup_id: String, + }, + Edit { + specifier: Specifier, + + #[structopt( + long = "--editgroup-id", + short, + env = "FATCAT_EDITGROUP", + hide_env_values = true + )] + editgroup_id: String, + + #[structopt(long)] + json: bool, + + #[allow(dead_code)] + #[structopt(long)] + toml: bool, + + #[structopt(long = "--editing-command", env = "EDITOR")] + editing_command: String, + }, + Download { + specifier: Specifier, + }, + History { + specifier: Specifier, + + #[structopt(long, short = "-n", default_value = "100")] + limit: u64, + + #[structopt(long)] + json: bool, + }, + Search { + entity_type: EntityType, + + terms: Vec<String>, + + #[structopt(long = "--expand")] + expand: Option<String>, + + #[structopt(long = "--hide")] + hide: Option<String>, + + #[structopt(long, short = "-n", default_value = "20")] + limit: i64, + + #[structopt(long = "--search-schema")] + search_schema: bool, + }, + Editgroup { + #[structopt(subcommand)] + cmd: EditgroupCommand, + }, + Changelog { + #[structopt(long, short = "-n", default_value = "20")] + limit: i64, + + /* TODO: follow (streaming) mode for changelog + #[structopt(long, short = "-f")] + follow: bool, + */ + #[structopt(long)] + json: bool, + }, + Batch { + #[structopt(subcommand)] + cmd: BatchCommand, + + /// Input file, "-" for stdin. + #[structopt(long = "--file", short = "-f", parse(from_os_str))] + input_path: Option<PathBuf>, + + #[structopt(long)] + limit: Option<u64>, + }, + Status { + #[structopt(long)] + json: bool, + }, +} + +fn main() -> Result<()> { + let opt = Opt::from_args(); + + let log_level = match opt.verbose { + std::i8::MIN..=-1 => "none", + 0 => "error", + 1 => "warn", + 2 => "info", + 3 => "debug", + 4..=std::i8::MAX => "trace", + }; + // hyper logging is very verbose, so crank that down even if everything else is more verbose + let log_filter = format!("{},hyper=error", log_level); + env_logger::from_env(env_logger::Env::default().default_filter_or(log_filter)) + .format_timestamp(None) + .init(); + + debug!("Args parsed, starting up"); + + if let Err(err) = run(opt) { + // Be graceful about some errors + if let Some(io_err) = err.root_cause().downcast_ref::<std::io::Error>() { + if let std::io::ErrorKind::BrokenPipe = io_err.kind() { + // presumably due to something like writing to stdout and piped to `head -n10` and + // stdout was closed + debug!("got BrokenPipe error, assuming stdout closed as expected and exiting with success"); + std::process::exit(0); + } + } + let mut color_stderr = StandardStream::stderr(if atty::is(atty::Stream::Stderr) { + ColorChoice::Auto + } else { + ColorChoice::Never + }); + color_stderr.set_color(ColorSpec::new().set_fg(Some(Color::Red)).set_bold(true))?; + eprintln!("Error: {:?}", err); + color_stderr.set_color(&ColorSpec::new())?; + std::process::exit(1); + } + Ok(()) +} + +fn run(opt: Opt) -> Result<()> { + let mut api_client = FatcatApiClient::new(opt.api_host.clone(), opt.api_token.clone())?; + + match opt.cmd { + Command::Get { + specifier, + expand, + hide, + json, + toml, + } => { + let result = specifier.get_from_api(&mut api_client, expand, hide)?; + if toml { + writeln!(&mut std::io::stdout(), "{}", result.to_toml_string()?)? + } else if json || true { + writeln!(&mut std::io::stdout(), "{}", result.to_json_string()?)? + } + } + Command::Create { + entity_type, + input_path, + editgroup_id, + } => { + let json_str = read_entity_file(input_path)?; + let ee = api_client.create_entity_from_json(entity_type, &json_str, editgroup_id)?; + println!("{}", serde_json::to_string(&ee)?); + } + Command::Update { + specifier, + input_path, + editgroup_id, + mutations, + } => { + let (json_str, exact_specifier): (String, Specifier) = + match (&input_path, mutations.len()) { + // input path or no mutations: read from path or stdin + (Some(_), _) | (None, 0) => ( + read_entity_file(input_path)?, + specifier.into_entity_specifier(&mut api_client)?, + ), + // no input path *and* mutations: fetch from API + (None, _) => { + let mut entity = specifier.get_from_api(&mut api_client, None, None)?; + entity.mutate(mutations)?; + (entity.to_json_string()?, entity.specifier()) + } + }; + let ee = + api_client.update_entity_from_json(exact_specifier, &json_str, editgroup_id)?; + println!("{}", serde_json::to_string(&ee)?); + } + Command::Edit { + specifier, + editgroup_id, + json, + toml: _, + editing_command, + } => { + let ee = edit_entity_locally( + &mut api_client, + specifier, + editgroup_id, + json, + editing_command, + )?; + println!("{}", serde_json::to_string(&ee)?); + } + Command::Changelog { limit, json } => { + let resp = api_client + .rt + .block_on(api_client.api.get_changelog(Some(limit))) + .context("fetch recent changelogs")?; + match resp { + fatcat_openapi::GetChangelogResponse::Success(change_list) => { + print_changelog_entries(change_list, json)?; + } + other => { + return Err(anyhow!("{:?}", other)) + .with_context(|| format!("failed to fetch changelogs")) + } + } + } + Command::Batch { + cmd: + BatchCommand::Create { + entity_type, + batch_size, + auto_accept, + }, + input_path, + limit, + } => { + let input_path = path_or_stdin(input_path); + let mut batch = BatchGrouper::new(entity_type, batch_size, limit, auto_accept); + batch.run(&mut api_client, input_path, BatchOp::Create, None)?; + } + Command::Batch { + cmd: + BatchCommand::Update { + entity_type, + mutations, + batch_size, + auto_accept, + }, + input_path, + limit, + } => { + let input_path = path_or_stdin(input_path); + let mut batch = BatchGrouper::new(entity_type, batch_size, limit, auto_accept); + batch.run( + &mut api_client, + input_path, + BatchOp::Update, + Some(mutations), + )?; + } + Command::Batch { + cmd: + BatchCommand::Delete { + entity_type, + batch_size, + auto_accept, + }, + input_path, + limit, + } => { + let input_path = path_or_stdin(input_path); + let mut batch = BatchGrouper::new(entity_type, batch_size, limit, auto_accept); + batch.run(&mut api_client, input_path, BatchOp::Delete, None)?; + } + Command::Batch { + cmd: BatchCommand::Download {}, + input_path, + limit, + } => { + let input_path = path_or_stdin(input_path); + download_batch(input_path, limit)?; + } + Command::Download { specifier } => { + // run lookups if necessary (inefficient) + let specifier = match specifier { + Specifier::ReleaseLookup(_, _) | Specifier::FileLookup(_, _) => { + specifier.into_entity_specifier(&mut api_client)? + } + _ => specifier, + }; + let status = match specifier { + Specifier::Release(ident) => { + let result = api_client.rt.block_on(api_client.api.get_release( + ident.clone(), + Some("files".to_string()), + Some("abstracts,refs".to_string()), + ))?; + let release_entity = match result { + fatcat_openapi::GetReleaseResponse::FoundEntity(model) => Ok(model), + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", ident)), + }?; + download_release(&release_entity) + } + Specifier::File(ident) => { + let result = api_client.rt.block_on(api_client.api.get_file( + ident.clone(), + None, + None, + ))?; + let file_entity = match result { + fatcat_openapi::GetFileResponse::FoundEntity(model) => Ok(model), + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", ident)), + }?; + download_file(&file_entity) + } + other => Err(anyhow!("Don't know how to download: {:?}", other)), + }?; + if let Some(detail) = status.details() { + println!("{}: {}", status, detail); + } else { + println!("{}", status); + } + } + Command::Search { + entity_type, + terms, + limit, + search_schema, + expand, + hide, + } => { + let limit: Option<u64> = match limit { + l if l <= 0 => None, + l => Some(l as u64), + }; + let results = fatcat_cli::crude_search(&opt.search_host, entity_type, limit, terms) + .with_context(|| format!("searching for {:?}", entity_type))?; + eprintln!("Got {} hits in {}ms", results.count, results.took_ms); + for hit in results { + let hit = hit?; + match (search_schema, entity_type) { + (true, _) => writeln!(&mut std::io::stdout(), "{}", hit.to_string())?, + (false, EntityType::Release) => { + let specifier = + Specifier::Release(hit["ident"].as_str().unwrap().to_string()); + let entity = specifier.get_from_api( + &mut api_client, + expand.clone(), + hide.clone(), + )?; + writeln!(&mut std::io::stdout(), "{}", entity.to_json_string()?)? + } + (false, _) => unimplemented!("searching other entity types"), + } + } + } + Command::Delete { + specifier, + editgroup_id, + } => { + let result = api_client + .delete_entity(specifier.clone(), editgroup_id) + .with_context(|| format!("delete entity: {:?}", specifier))?; + println!("{}", serde_json::to_string(&result)?); + } + Command::History { + specifier, + limit, + json, + } => { + let specifier = specifier.into_entity_specifier(&mut api_client)?; + let history_entries = specifier.get_history(&mut api_client, Some(limit))?; + print_entity_histories(history_entries, json)?; + } + Command::Editgroup { + cmd: + EditgroupCommand::List { + editor_id, + limit, + json, + }, + } => { + let editor_id = match editor_id.or(api_client.editor_id) { + Some(eid) => eid, + None => return Err(anyhow!("require either working auth token or --editor-id")), + }; + let result = api_client + .rt + .block_on(api_client.api.get_editor_editgroups( + editor_id.clone(), + Some(limit), + None, + None, + )) + .context("fetch editgroups")?; + match result { + fatcat_openapi::GetEditorEditgroupsResponse::Found(eg_list) => { + print_editgroups(eg_list, json)?; + } + other => { + return Err(anyhow!("{:?}", other)).with_context(|| { + format!("failed to fetch editgroups for editor_{}", editor_id) + }) + } + } + } + Command::Editgroup { + cmd: EditgroupCommand::Reviewable { limit, json }, + } => { + let result = api_client + .rt + .block_on(api_client.api.get_editgroups_reviewable( + Some("editors".to_string()), + Some(limit), + None, + None, + )) + .context("fetch reviewable editgroups")?; + match result { + fatcat_openapi::GetEditgroupsReviewableResponse::Found(eg_list) => { + print_editgroups(eg_list, json)?; + } + other => { + return Err(anyhow!("{:?}", other)) + .context("failed to fetch reviewable editgroups") + } + } + } + Command::Editgroup { + cmd: EditgroupCommand::Create { description }, + } => { + let eg = api_client.create_editgroup(Some(description))?; + println!("{}", serde_json::to_string(&eg)?) + } + Command::Editgroup { + cmd: EditgroupCommand::Accept { editgroup_id }, + } => { + let msg = api_client.accept_editgroup(editgroup_id.clone())?; + println!("{}", serde_json::to_string(&msg)?); + } + Command::Editgroup { + cmd: EditgroupCommand::Submit { editgroup_id }, + } => { + let eg = api_client.update_editgroup_submit(editgroup_id, true)?; + println!("{}", eg.to_json_string()?); + } + Command::Editgroup { + cmd: EditgroupCommand::Unsubmit { editgroup_id }, + } => { + let eg = api_client.update_editgroup_submit(editgroup_id, false)?; + println!("{}", eg.to_json_string()?); + } + Command::Status { json } => { + let status = ClientStatus::generate(&mut api_client)?; + if json { + println!("{}", serde_json::to_string(&status)?) + } else { + status.pretty_print()?; + } + } + } + Ok(()) +} diff --git a/fatcat-cli/src/search.rs b/fatcat-cli/src/search.rs new file mode 100644 index 0000000..f778477 --- /dev/null +++ b/fatcat-cli/src/search.rs @@ -0,0 +1,186 @@ +use crate::EntityType; +use anyhow::{anyhow, Result}; +use log::{self, info}; +use serde_json::json; +use std::time::Duration; + +pub struct SearchResults { + pub entity_type: EntityType, + pub limit: Option<u64>, + pub count: u64, + pub took_ms: u64, + offset: u64, + batch: Vec<serde_json::Value>, + scroll_id: Option<String>, + scroll_url: String, + http_client: reqwest::blocking::Client, +} + +impl Iterator for SearchResults { + type Item = Result<serde_json::Value>; + + fn next(&mut self) -> Option<Result<serde_json::Value>> { + // if we already hit limit, bail early + if let Some(l) = self.limit { + if self.offset >= l { + return None; + } + } + // if current batch is empty, and we are scrolling, refill the current batch + if self.batch.is_empty() && self.scroll_id.is_some() { + let response = self + .http_client + .get(&self.scroll_url) + .header("Content-Type", "application/json") + .body( + json!({ + "scroll": "2m", + "scroll_id": self.scroll_id.clone().unwrap(), + }) + .to_string(), + ) + .send(); + let response = match response { + Err(e) => return Some(Err(e.into())), + Ok(v) => v, + }; + if !response.status().is_success() { + return Some(Err(anyhow!("search error, status={}", response.status()))); + }; + let body: serde_json::Value = match response.json() { + Err(e) => return Some(Err(e.into())), + Ok(v) => v, + }; + self.scroll_id = Some(body["_scroll_id"].as_str().unwrap().to_string()); + self.batch = body["hits"]["hits"].as_array().unwrap().to_vec(); + } + + // return next hit from the most recent batch + if !self.batch.is_empty() { + self.offset += 1; + let val = self.batch.pop().unwrap(); + let source = val["_source"].clone(); + return Some(Ok(source)); + } + + // if batch is empty and couldn't be refilled, terminate + // TODO: should we raise error if ended early? + None + } +} + +pub fn crude_search( + api_host: &str, + entity_type: EntityType, + limit: Option<u64>, + terms: Vec<String>, +) -> Result<SearchResults> { + let index = match entity_type { + EntityType::Release => "fatcat_release", + EntityType::File => "fatcat_file", + EntityType::Container => "fatcat_container", + _ => { + return Err(anyhow!( + "No search index for entity type: {:?}", + entity_type + )) + } + }; + let http_client = reqwest::blocking::Client::builder() + .timeout(Duration::from_secs(10)) + .danger_accept_invalid_certs(true) + .build() + .expect("ERROR :: Could not build reqwest client"); + + let query: String = if terms.is_empty() { + "*".to_string() + } else { + terms.join(" ") + }; + info!("Search query string: {}", query); + let request_url = format!("{}/{}/_search", api_host, index); + let scroll_url = format!("{}/_search/scroll", api_host); + + // sort by _doc for (potentially) very large result sets + let (scroll_mode, sort_mode, size) = match limit { + None => (true, "_doc", 100), + Some(l) if l > 100 => (true, "_doc", 100), + Some(l) => (false, "_score", l), + }; + + let query_body = json!({ + "query": { + "boosting": { + "positive": { + "bool": { + "must": { + "query_string": { + "query": query, + "default_operator": "AND", + "analyze_wildcard": true, + "allow_leading_wildcard": false, + "lenient": true, + "fields": [ + "title^2", + "biblio", + ], + }, + }, + "should": { + "term": { "in_ia": true }, + }, + }, + }, + "negative": { + "bool": { + "should": [ + {"bool": { "must_not" : { "exists": { "field": "title" }}}}, + {"bool": { "must_not" : { "exists": { "field": "year" }}}}, + {"bool": { "must_not" : { "exists": { "field": "type" }}}}, + {"bool": { "must_not" : { "exists": { "field": "stage" }}}}, + ], + }, + }, + "negative_boost": 0.5, + }, + }, + "size": size, + "sort": [ sort_mode ], + }) + .to_string(); + + let mut request = http_client + .get(&request_url) + .header("Content-Type", "application/json") + .body(query_body); + + if scroll_mode { + request = request.query(&[("scroll", "2m")]); + } + + let response = request.send()?; + + if !response.status().is_success() { + return Err(anyhow!("search error, status={}", response.status())); + } + //println!("{:?}", response); + let body: serde_json::Value = response.json()?; + + let scroll_id = if scroll_mode { + Some(body["_scroll_id"].as_str().unwrap().to_string()) + } else { + None + }; + + Ok(SearchResults { + entity_type, + limit, + count: body["hits"]["total"].as_u64().unwrap(), + took_ms: body["took"].as_u64().unwrap(), + offset: 0, + batch: body["hits"]["hits"].as_array().unwrap().to_vec(), + scroll_id, + scroll_url, + http_client, + }) +} diff --git a/fatcat-cli/src/specifier.rs b/fatcat-cli/src/specifier.rs new file mode 100644 index 0000000..0d8d209 --- /dev/null +++ b/fatcat-cli/src/specifier.rs @@ -0,0 +1,584 @@ +use crate::{ApiEntityModel, EntityType, FatcatApiClient}; +use anyhow::{anyhow, Context, Result}; +use lazy_static::lazy_static; +use regex::Regex; +use std::str::FromStr; + +#[derive(Debug, PartialEq, Clone)] +pub enum ReleaseLookupKey { + DOI, + PMCID, + PMID, + Arxiv, + // TODO: the others +} + +#[derive(Debug, PartialEq, Clone)] +pub enum ContainerLookupKey { + ISSNL, +} + +#[derive(Debug, PartialEq, Clone)] +pub enum CreatorLookupKey { + Orcid, +} + +#[derive(Debug, PartialEq, Clone)] +pub enum FileLookupKey { + SHA1, + SHA256, + MD5, +} + +#[derive(Debug, PartialEq, Clone)] +pub enum Specifier { + Release(String), + ReleaseLookup(ReleaseLookupKey, String), + Work(String), + Container(String), + ContainerLookup(ContainerLookupKey, String), + Creator(String), + CreatorLookup(CreatorLookupKey, String), + File(String), + FileLookup(FileLookupKey, String), + FileSet(String), + WebCapture(String), + Editgroup(String), + Editor(String), + EditorUsername(String), + Changelog(i64), +} + +impl Specifier { + pub fn from_ident(entity_type: EntityType, ident: String) -> Specifier { + match entity_type { + EntityType::Release => Specifier::Release(ident), + EntityType::Work => Specifier::Work(ident), + EntityType::Container => Specifier::Container(ident), + EntityType::Creator => Specifier::Creator(ident), + EntityType::File => Specifier::File(ident), + EntityType::FileSet => Specifier::FileSet(ident), + EntityType::WebCapture => Specifier::WebCapture(ident), + } + } + + /// If this Specifier is a lookup, call the API to do the lookup and return the resulting + /// specific entity specifier (eg, with an FCID). If already specific, just pass through. + pub fn into_entity_specifier(self, api_client: &mut FatcatApiClient) -> Result<Specifier> { + use Specifier::*; + match self { + Release(_) | Work(_) | Creator(_) | Container(_) | File(_) | FileSet(_) + | WebCapture(_) | Editgroup(_) | Editor(_) | Changelog(_) => Ok(self), + ReleaseLookup(_, _) => Ok(self.get_from_api(api_client, None, None)?.specifier()), + ContainerLookup(_, _) => Ok(self.get_from_api(api_client, None, None)?.specifier()), + CreatorLookup(_, _) => Ok(self.get_from_api(api_client, None, None)?.specifier()), + FileLookup(_, _) => Ok(self.get_from_api(api_client, None, None)?.specifier()), + EditorUsername(_username) => Err(anyhow!( + "editor lookup by username isn't implemented in fatcat-server API yet, sorry" + )), + } + } + + pub fn get_from_api( + &self, + api_client: &mut FatcatApiClient, + expand: Option<String>, + hide: Option<String>, + ) -> Result<Box<dyn ApiEntityModel>> { + use Specifier::*; + let ret: Result<Box<dyn ApiEntityModel>> = match self { + Release(fcid) => match api_client.rt.block_on(api_client.api.get_release( + fcid.to_string(), + expand, + hide, + ))? { + fatcat_openapi::GetReleaseResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetReleaseResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetReleaseResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + ReleaseLookup(ext_id, key) => { + use ReleaseLookupKey::*; + let (doi, pmcid, pmid, arxiv) = ( + if let DOI = ext_id { + Some(key.to_string()) + } else { + None + }, + if let PMCID = ext_id { + Some(key.to_string()) + } else { + None + }, + if let PMID = ext_id { + Some(key.to_string()) + } else { + None + }, + if let Arxiv = ext_id { + Some(key.to_string()) + } else { + None + }, + ); + // doi, wikidata, isbn13, pmid, pmcid, core, arxiv, jstor, ark, mag + let result = api_client.rt.block_on(api_client.api.lookup_release( + doi, None, None, pmid, pmcid, None, arxiv, None, None, None, None, None, None, + expand, hide, + ))?; + match result { + fatcat_openapi::LookupReleaseResponse::FoundEntity(model) => { + Ok(Box::new(model)) + } + fatcat_openapi::LookupReleaseResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::LookupReleaseResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + } + } + Work(fcid) => match api_client.rt.block_on(api_client.api.get_work( + fcid.to_string(), + expand, + hide, + ))? { + fatcat_openapi::GetWorkResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetWorkResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetWorkResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + Container(fcid) => match api_client.rt.block_on(api_client.api.get_container( + fcid.to_string(), + expand, + hide, + ))? { + fatcat_openapi::GetContainerResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetContainerResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetContainerResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + ContainerLookup(ext_id, key) => { + let result = api_client.rt.block_on(match ext_id { + ContainerLookupKey::ISSNL => { + api_client + .api + .lookup_container(Some(key.to_string()), None, expand, hide) + } + })?; + match result { + fatcat_openapi::LookupContainerResponse::FoundEntity(model) => { + Ok(Box::new(model)) + } + fatcat_openapi::LookupContainerResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::LookupContainerResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + } + } + Creator(fcid) => match api_client.rt.block_on(api_client.api.get_creator( + fcid.to_string(), + expand, + hide, + ))? { + fatcat_openapi::GetCreatorResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetCreatorResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetCreatorResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + CreatorLookup(ext_id, key) => { + let result = api_client.rt.block_on(match ext_id { + CreatorLookupKey::Orcid => { + api_client + .api + .lookup_creator(Some(key.to_string()), None, expand, hide) + } + })?; + match result { + fatcat_openapi::LookupCreatorResponse::FoundEntity(model) => { + Ok(Box::new(model)) + } + fatcat_openapi::LookupCreatorResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::LookupCreatorResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + } + } + File(fcid) => match api_client.rt.block_on(api_client.api.get_file( + fcid.to_string(), + expand, + hide, + ))? { + fatcat_openapi::GetFileResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetFileResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetFileResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + FileLookup(hash, key) => { + use FileLookupKey::*; + let (sha1, sha256, md5) = ( + if let SHA1 = hash { + Some(key.to_string()) + } else { + None + }, + if let SHA256 = hash { + Some(key.to_string()) + } else { + None + }, + if let MD5 = hash { + Some(key.to_string()) + } else { + None + }, + ); + let result = api_client + .rt + .block_on(api_client.api.lookup_file(sha1, sha256, md5, expand, hide))?; + match result { + fatcat_openapi::LookupFileResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::LookupFileResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::LookupFileResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + } + } + FileSet(fcid) => match api_client.rt.block_on(api_client.api.get_fileset( + fcid.to_string(), + expand, + hide, + ))? { + fatcat_openapi::GetFilesetResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetFilesetResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetFilesetResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + WebCapture(fcid) => match api_client.rt.block_on(api_client.api.get_webcapture( + fcid.to_string(), + expand, + hide, + ))? { + fatcat_openapi::GetWebcaptureResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetWebcaptureResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetWebcaptureResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + Editgroup(fcid) => match api_client + .rt + .block_on(api_client.api.get_editgroup(fcid.to_string()))? + { + fatcat_openapi::GetEditgroupResponse::Found(model) => Ok(Box::new(model)), + fatcat_openapi::GetEditgroupResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetEditgroupResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + Editor(fcid) => match api_client + .rt + .block_on(api_client.api.get_editor(fcid.to_string()))? + { + fatcat_openapi::GetEditorResponse::Found(model) => Ok(Box::new(model)), + fatcat_openapi::GetEditorResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetEditorResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + Changelog(index) => match api_client + .rt + .block_on(api_client.api.get_changelog_entry(*index))? + { + fatcat_openapi::GetChangelogEntryResponse::FoundChangelogEntry(model) => { + Ok(Box::new(model)) + } + fatcat_openapi::GetChangelogEntryResponse::BadRequest(err) => { + Err(anyhow!("Bad Request ({}): {}", err.error, err.message)) + } + fatcat_openapi::GetChangelogEntryResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + EditorUsername(_username) => { + unimplemented!( + "editor lookup by username isn't implemented in fatcat-server API yet, sorry" + ) + } + }; + match ret { + Ok(_) => ret, + Err(_) => ret.with_context(|| format!("Failed to GET {:?}", self)), + } + } + + pub fn get_history( + &self, + api_client: &mut FatcatApiClient, + limit: Option<u64>, + ) -> Result<Vec<fatcat_openapi::models::EntityHistoryEntry>> { + let limit: Option<i64> = limit.map(|v| v as i64); + use Specifier::*; + let ret: Result<Vec<fatcat_openapi::models::EntityHistoryEntry>> = match self { + Release(fcid) => match api_client + .rt + .block_on(api_client.api.get_release_history(fcid.to_string(), limit))? + { + fatcat_openapi::GetReleaseHistoryResponse::FoundEntityHistory(entries) => { + Ok(entries) + } + fatcat_openapi::GetReleaseHistoryResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + Work(fcid) => match api_client + .rt + .block_on(api_client.api.get_work_history(fcid.to_string(), limit))? + { + fatcat_openapi::GetWorkHistoryResponse::FoundEntityHistory(entries) => Ok(entries), + fatcat_openapi::GetWorkHistoryResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + Container(fcid) => match api_client.rt.block_on( + api_client + .api + .get_container_history(fcid.to_string(), limit), + )? { + fatcat_openapi::GetContainerHistoryResponse::FoundEntityHistory(entries) => { + Ok(entries) + } + fatcat_openapi::GetContainerHistoryResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + Creator(fcid) => match api_client + .rt + .block_on(api_client.api.get_creator_history(fcid.to_string(), limit))? + { + fatcat_openapi::GetCreatorHistoryResponse::FoundEntityHistory(entries) => { + Ok(entries) + } + fatcat_openapi::GetCreatorHistoryResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + File(fcid) => match api_client + .rt + .block_on(api_client.api.get_file_history(fcid.to_string(), limit))? + { + fatcat_openapi::GetFileHistoryResponse::FoundEntityHistory(entries) => Ok(entries), + fatcat_openapi::GetFileHistoryResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + FileSet(fcid) => match api_client + .rt + .block_on(api_client.api.get_fileset_history(fcid.to_string(), limit))? + { + fatcat_openapi::GetFilesetHistoryResponse::FoundEntityHistory(entries) => { + Ok(entries) + } + fatcat_openapi::GetFilesetHistoryResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + WebCapture(fcid) => match api_client.rt.block_on( + api_client + .api + .get_webcapture_history(fcid.to_string(), limit), + )? { + fatcat_openapi::GetWebcaptureHistoryResponse::FoundEntityHistory(entries) => { + Ok(entries) + } + fatcat_openapi::GetWebcaptureHistoryResponse::NotFound(err) => { + Err(anyhow!("Not Found: {}", err.message)) + } + resp => Err(anyhow!("{:?}", resp)) + .with_context(|| format!("API GET failed: {:?}", self)), + }, + _ => Err(anyhow!("Don't know how to look up history for: {:?}", self)), + }; + match ret { + Ok(_) => ret, + Err(_) => ret.with_context(|| format!("Failed to GET history: {:?}", self)), + } + } +} + +impl FromStr for Specifier { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + // first try simple entity prefixes + lazy_static! { + static ref SPEC_ENTITY_RE: Regex = Regex::new(r"^(release|work|creator|container|file|fileset|webcapture|editgroup|editor)_([2-7a-z]{26})$").unwrap(); + } + if let Some(caps) = SPEC_ENTITY_RE.captures(s) { + return match (&caps[1], &caps[2]) { + ("release", fcid) => Ok(Specifier::Release(fcid.to_string())), + ("work", fcid) => Ok(Specifier::Work(fcid.to_string())), + ("container", fcid) => Ok(Specifier::Container(fcid.to_string())), + ("creator", fcid) => Ok(Specifier::Creator(fcid.to_string())), + ("file", fcid) => Ok(Specifier::File(fcid.to_string())), + ("fileset", fcid) => Ok(Specifier::FileSet(fcid.to_string())), + ("webcapture", fcid) => Ok(Specifier::WebCapture(fcid.to_string())), + ("editgroup", fcid) => Ok(Specifier::Editgroup(fcid.to_string())), + ("editor", fcid) => Ok(Specifier::Editor(fcid.to_string())), + _ => Err(anyhow!("unexpected fatcat FCID type: {}", &caps[1])), + }; + } + + // then try lookup prefixes + lazy_static! { + static ref SPEC_LOOKUP_RE: Regex = Regex::new( + r"^(doi|pmcid|pmid|arxiv|issnl|orcid|sha1|sha256|md5|username|changelog):(\S+)$" + ) + .unwrap(); + } + if let Some(caps) = SPEC_LOOKUP_RE.captures(s) { + return match (&caps[1], &caps[2]) { + ("doi", key) => Ok(Specifier::ReleaseLookup( + ReleaseLookupKey::DOI, + key.to_string(), + )), + ("pmcid", key) => Ok(Specifier::ReleaseLookup( + ReleaseLookupKey::PMCID, + key.to_string(), + )), + ("pmid", key) => Ok(Specifier::ReleaseLookup( + ReleaseLookupKey::PMID, + key.to_string(), + )), + ("arxiv", key) => Ok(Specifier::ReleaseLookup( + ReleaseLookupKey::Arxiv, + key.to_string(), + )), + ("issnl", key) => Ok(Specifier::ContainerLookup( + ContainerLookupKey::ISSNL, + key.to_string(), + )), + ("orcid", key) => Ok(Specifier::CreatorLookup( + CreatorLookupKey::Orcid, + key.to_string(), + )), + ("sha1", key) => Ok(Specifier::FileLookup(FileLookupKey::SHA1, key.to_string())), + ("sha256", key) => Ok(Specifier::FileLookup( + FileLookupKey::SHA256, + key.to_string(), + )), + ("md5", key) => Ok(Specifier::FileLookup(FileLookupKey::MD5, key.to_string())), + ("username", key) => Ok(Specifier::EditorUsername(key.to_string())), + _ => Err(anyhow!("unexpected entity lookup type: {}", &caps[1])), + }; + } + // lastly, changelog entity lookup + lazy_static! { + static ref SPEC_CHANGELOG_RE: Regex = Regex::new(r"^changelog_(\d+)$").unwrap(); + }; + if let Some(caps) = SPEC_CHANGELOG_RE.captures(s) { + return Ok(Specifier::Changelog(caps[1].parse::<i64>()?)); + } + Err(anyhow!( + "expecting a specifier: entity identifier or key/value lookup: {}", + s + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_specifier_from_str() -> () { + assert!(Specifier::from_str("release_asdf").is_err()); + assert_eq!( + Specifier::from_str("creator_iimvc523xbhqlav6j3sbthuehu").unwrap(), + Specifier::Creator("iimvc523xbhqlav6j3sbthuehu".to_string()) + ); + assert_eq!( + Specifier::from_str("username:big-bot").unwrap(), + Specifier::EditorUsername("big-bot".to_string()) + ); + assert_eq!( + Specifier::from_str("doi:10.1234/a!s.df+-d").unwrap(), + Specifier::ReleaseLookup(ReleaseLookupKey::DOI, "10.1234/a!s.df+-d".to_string()) + ); + assert!(Specifier::from_str("doi:").is_err()); + assert_eq!( + Specifier::from_str("changelog_1234").unwrap(), + Specifier::Changelog(1234) + ); + assert!(Specifier::from_str("changelog_12E4").is_err()); + } +} |