From 8940330bf0472cdcfcf0219ca04f5d3320abe691 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sat, 13 Jun 2020 15:01:47 -0700 Subject: WIP fatcat-cli code from bnewbold-cli branch --- rust/fatcat-cli/Cargo.toml | 41 +++++ rust/fatcat-cli/TODO | 17 ++ rust/fatcat-cli/binary_size.md | 88 ++++++++++ rust/fatcat-cli/plan.txt | 116 +++++++++++++ rust/fatcat-cli/src/api.rs | 237 +++++++++++++++++++++++++ rust/fatcat-cli/src/entities.rs | 208 ++++++++++++++++++++++ rust/fatcat-cli/src/lib.rs | 222 ++++++++++++++++++++++++ rust/fatcat-cli/src/main.rs | 364 +++++++++++++++++++++++++++++++++++++++ rust/fatcat-cli/src/mutation.rs | 0 rust/fatcat-cli/src/search.rs | 172 ++++++++++++++++++ rust/fatcat-cli/src/specifier.rs | 282 ++++++++++++++++++++++++++++++ 11 files changed, 1747 insertions(+) create mode 100644 rust/fatcat-cli/Cargo.toml create mode 100644 rust/fatcat-cli/TODO create mode 100644 rust/fatcat-cli/binary_size.md create mode 100644 rust/fatcat-cli/plan.txt create mode 100644 rust/fatcat-cli/src/api.rs create mode 100644 rust/fatcat-cli/src/entities.rs create mode 100644 rust/fatcat-cli/src/lib.rs create mode 100644 rust/fatcat-cli/src/main.rs create mode 100644 rust/fatcat-cli/src/mutation.rs create mode 100644 rust/fatcat-cli/src/search.rs create mode 100644 rust/fatcat-cli/src/specifier.rs diff --git a/rust/fatcat-cli/Cargo.toml b/rust/fatcat-cli/Cargo.toml new file mode 100644 index 0000000..09dd492 --- /dev/null +++ b/rust/fatcat-cli/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "fatcat-cli" +version = "0.3.0-dev" +edition = "2018" +authors = ["Bryan Newbold "] +license = "AGPL-3+" +description = "CLI tool for fatcat.wiki, an open digital catalog of research papers" +readme = "README_cli.md" +homepage = "https://fatcat.wiki" +repository = "https://github.com/internetarchive/fatcat" +keywords = ["cli", "fatcat", "digital-library"] +categories = ["command-line-utilities"] + + +[dependencies] +data-encoding = "2.1" +fatcat-openapi = { version = "*", path = "../fatcat-openapi", default-features = false, features = ["client"] } +macaroon = { git = "https://github.com/bnewbold/libmacaroon-rs", branch = "bnewbold-broken" } +toml = "0.5" +termcolor = "1" +atty = "0.2" +tabwriter = "1.2" +#human-panic = "1" +structopt = "0.3" +swagger = "4" +hyper = "0.12" +tokio = "0.1.17" +serde_json = "1.0" +anyhow = "1.0" +log = "0.4" +env_logger = "0.7" +regex = { version = "1.3", default-features = false, features = ["perf-literal"] } +lazy_static = "1" +serde = "1.0" +reqwest = "0.9" +chrono-humanize = "*" +tempfile = "3" + + +[dev-dependencies] +assert_cmd = "1" diff --git a/rust/fatcat-cli/TODO b/rust/fatcat-cli/TODO new file mode 100644 index 0000000..5e9149b --- /dev/null +++ b/rust/fatcat-cli/TODO @@ -0,0 +1,17 @@ + +x type/struct to wrap api_client and runtime +x trait +x parse "specifier" +- API auth (token, context) +- "status" command + => auth_check + => build a struct with serde_json+prettyprint +- switch from failure to anyhow = "1.0" +- editgroup creation + +- updating using CLI specifiers +- edit command +- multiple files? + +Random old command: + OPENSSL_LIB_DIR="/usr/lib/x86_64-linux-gnu" OPENSSL_INCLUDE_DIR="/usr/include/openssl" cargo build diff --git a/rust/fatcat-cli/binary_size.md b/rust/fatcat-cli/binary_size.md new file mode 100644 index 0000000..a79cf9b --- /dev/null +++ b/rust/fatcat-cli/binary_size.md @@ -0,0 +1,88 @@ + +## Binary Size + +As of 2020-05-24, in early development, the relative binary sizes are: + + 121 MB default debug build + 12 MB default release build + 8.2 MB release build w/ LTO + 6.6 MB release build w/ LTO, striped + +After some small changes: + + 5.9 MB release build w/ LTO, size optimization, other flags + 4.1 MB release build w/ LTO, size optimization, other flags, striped + +Replacing reqwest with minreq: + + 6.3 MB release build w/ LTO, size optimization, other flags + 4.1 MB release build w/ LTO, size optimization, other flags, striped + + (so, not worth it, at least while using fatcat_openapi with hyper+tokio) + +Note that release builds with LTO take *quite* a long time (many minutes). We +probably don't want that to be the defualt for `fatcatd` builds. + + cargo bloat --release --crates + + File .text Size Crate + 12.2% 21.4% 1021.5KiB fatcat_cli + 7.1% 12.5% 596.7KiB fatcat_openapi + 6.3% 11.1% 529.6KiB reqwest + 6.2% 10.9% 518.5KiB std + 3.5% 6.1% 290.3KiB clap + 2.5% 4.3% 205.9KiB regex + 2.4% 4.2% 198.7KiB regex_syntax + 2.1% 3.6% 172.8KiB h2 + 1.9% 3.4% 162.7KiB hyper + 1.8% 3.1% 149.9KiB futures + 1.4% 2.4% 116.9KiB serde_json + 1.3% 2.3% 111.2KiB macaroon + 1.0% 1.8% 85.3KiB unicode_normalization + 0.7% 1.3% 62.4KiB http + 0.6% 1.0% 50.1KiB serde + 0.6% 1.0% 47.5KiB url + 0.5% 0.9% 41.9KiB [Unknown] + 0.4% 0.8% 36.5KiB tokio_reactor + 0.4% 0.7% 31.8KiB env_logger + 0.3% 0.6% 26.6KiB chrono + 3.4% 5.9% 283.3KiB And 57 more crates. Use -n N to show more. + 57.2% 100.0% 4.7MiB .text section size, the file size is 8.2MiB + + + bnewbold@orithena$ cargo bloat --release + Finished release [optimized] target(s) in 0.27s + Analyzing target/release/fatcat-cli + + File .text Size Crate Name + 0.4% 1.0% 53.2KiB regex ::capture... + 0.4% 0.8% 44.1KiB regex_syntax regex_syntax::ast::parse::ParserI

::parse_with_comments + 0.3% 0.7% 36.8KiB unicode_normalization unicode_normalization::tables::compatibility_fully_decomposed + 0.3% 0.6% 30.3KiB unicode_normalization unicode_normalization::tables::canonical_fully_decomposed + 0.2% 0.5% 25.2KiB data_encoding data_encoding::Encoding::decode_mut + 0.2% 0.5% 24.0KiB fatcat_openapi? as serde::de::Deserializer>::deser... + 0.1% 0.3% 16.4KiB fatcat_cli <&mut serde_json::de::Deserializer as serde::de::Deserializer>::deser... + 0.1% 0.3% 16.2KiB fatcat_cli <&mut serde_json::de::Deserializer as serde::de::Deserializer>::deser... + 0.1% 0.3% 16.1KiB fatcat_cli fatcat_cli::run + 0.1% 0.3% 15.2KiB fatcat_cli <&mut serde_json::de::Deserializer as serde::de::Deserializer>::deser... + 0.1% 0.3% 14.3KiB serde_json? <&mut serde_json::de::Deserializer as serde::de::Deserializer>::deser... + 0.1% 0.3% 14.2KiB fatcat_cli <&mut serde_json::de::Deserializer as serde::de::Deserializer>::deser... + 0.1% 0.3% 14.0KiB regex regex::exec::ExecBuilder::build + 0.1% 0.3% 13.8KiB unicode_normalization unicode_normalization::tables::composition_table + 0.1% 0.3% 13.6KiB fatcat_cli <&mut serde_json::de::Deserializer as serde::de::Deserializer>::deser... + 38.6% 89.5% 4.5MiB And 13832 smaller methods. Use -n N to show more. + 43.1% 100.0% 5.1MiB .text section size, the file size is 11.8MiB + +Low hanging fruit includes: + +- reviewing features for reqwest, clap, regex, fatcat_openapi +- replace reqwest with something smaller +- use `ansi-term` (already part of clap) +- consider removing fancy clap features? meh +- look at graph; probably duplicate versions of things + diff --git a/rust/fatcat-cli/plan.txt b/rust/fatcat-cli/plan.txt new file mode 100644 index 0000000..0b2c3a7 --- /dev/null +++ b/rust/fatcat-cli/plan.txt @@ -0,0 +1,116 @@ + +x search release, query string, limit, dumping search doc JSON +x search release, query string, limit, fetching API for each +x search release, query string, scroll API, fetching API for each + +x handle stdout terminated + +x editgroup creation + => set agent +x editgroup accept +x editgroup submit +x editgroup list + +x release create from json/TOML, to an editgroup +x release delete, to an editgroup +x release update from full json/TOML to API +x release edit (using $EDITOR, temp file) + +- -n/--num for limits +- -o/--output and -i/--input for format/schema selection (including 'es-json') +- parse editgroup specifier + => "auto": fetch from recent; default? + => "new": create + => editgroup_blah or blah +- release update fields and submit to editgroup + => more fields, at least 10 +- implement "delete from editgroup" for creation, updates, edit + => fetch editgroup helper + => helper function that takes editgroup (model) and expanded specifier; deletes existing edit from editgroup if necessary + => skip this codepath for "new" and batch creation +- implement @-syntax for create/update +- later: some variant of @-syntax for stream of multiple updates/creations? +- editgroup creation outputs just editgroup on stdout (unless output type selected), plus "success" to stderr + +- download single file: + => try archive.org files, then wayback, then original URLs + => download to current directory as {sha1hex}.pdf.partial, then atomic move on success +- syntect coloring of output for stdout +- expand/hide flags for get, search +- search/update/etc containers (and files?) + +- polish and test so actually usable for release edits from search + => manpage + => bash completion + => .deb generation + => consider moving to new repo, with copy of fatcat-openapi-client + => cross build for OS X? homebrew? + +- search release, filters, scroll API, fetching API for each + => structopt parses: query, filter, anti-filter +- search release, filters, scroll API, fetching API for each, verifying revision and filters for each +- optional directory structure: {dir}/{hex}/{hex}/{sha1hex}.pdf +- parallelism of downloads + +- history for all entity types + => pretty table, json optional +- get revisions for all entity types + +- fcid/UUID helper + +## Design Decisions + +- batch/multi behavior for mutations + => need some option to do auto-accept batches +- updates and create, from-file vs. args + => basically, could be any of specifier, input_file, mutations supplied on command-line + => could use httpie @file.blah syntax to load entire file + => "edit" as an option for reading single files from disk? meh + proposal: + create + either reads a file from path/stdin, or has mutation args + optionally --new-editgroup + create-multi + reads multiple JSON from file or stdin + optionally --auto-batch in chunks + optionally --new-editgroup + update + takes a specifier + either reads a file from path/stdin, or has mutation args + update-multi + reads multiple JSON from file or stdin + creates new editgroup? + edit + delete + delete-batch + reads multiple entities from stdin + + --skip-check controls whether to do a GET and validate mutations + => eg, don't update if equal +- holding state about current editgroup + => env var, with helpful output to show how to export + => spawn sub-shell with FATCAT_EDITGROUP set + => state in a config file somewhere (user homedir?) + => "smart" select most recent fatcat-cli editgroup from editor's list +- release revision checking on updates + => could re-fetch and check rev and/or mutations against current before making edit +- delete edit from editgroup + +## Rust refactors + +In rust code, all entity responses could have trait object implementations, +which would transform to either returning the entity (trait object) or error. + +## API refactors + +Could significantly reduce number of response types and endpoints by making +many methods generic (same endpoint URL, but entity type as a keyword): + +- entity history +- delete +- get edit + +Should allow destructive updates in editgroups with "clobber" flag. In +implementation, could either delete first or on conflict do upsert. + +More consistent use of generic success/error? diff --git a/rust/fatcat-cli/src/api.rs b/rust/fatcat-cli/src/api.rs new file mode 100644 index 0000000..e8b9e79 --- /dev/null +++ b/rust/fatcat-cli/src/api.rs @@ -0,0 +1,237 @@ + +use log::{self,info,debug}; +use hyper::client::ResponseFuture; +use fatcat_openapi; +use fatcat_openapi::{ApiNoContext, ApiError, ContextWrapperExt}; +use fatcat_openapi::client::Client; +use fatcat_openapi::models; +use swagger::{AuthData, ContextBuilder, EmptyContext, Push, XSpanIdString, auth}; +use anyhow::{Result, anyhow, Context}; +use crate::{ClientStatus,parse_macaroon_editor_id,Specifier, EntityType}; +use tokio::runtime::current_thread::Runtime; + + +pub struct FatcatApiClient<'a> { + pub api: fatcat_openapi::ContextWrapper<'a, Client, swagger::make_context_ty!( ContextBuilder, EmptyContext, Option, XSpanIdString)>, + pub rt: tokio::runtime::current_thread::Runtime, + api_token: Option, + api_host: String, + pub editor_id: Option, +} + +impl<'a> FatcatApiClient<'a> { + + pub fn new(client: &'a fatcat_openapi::client::Client, api_host: String, api_token: Option) -> Result { + + let auth_data = match api_token { + Some(ref token) => Some(AuthData::Bearer(auth::Bearer{ token: token.clone() })), + None => None, + }; + //info!("{:?}", auth_data); + let context: swagger::make_context_ty!( + ContextBuilder, + EmptyContext, + Option, + XSpanIdString + ) = swagger::make_context!( + ContextBuilder, + EmptyContext, + auth_data, + XSpanIdString::default() + ); + + let wrapped_client: fatcat_openapi::ContextWrapper, swagger::make_context_ty!( + ContextBuilder, + EmptyContext, + Option, + XSpanIdString + )> = client.with_context(context); + let rt: Runtime = Runtime::new().expect("create tokio runtime"); + + let editor_id = match api_token { + Some(ref token) => Some(parse_macaroon_editor_id(token).context("parse API auth token")?), + None => None, + }; + + Ok(FatcatApiClient { + api: wrapped_client, + rt, + api_token: api_token, + editor_id, + api_host, + }) + } + + pub fn status(&mut self) -> Result { + let last_changelog = match self.rt.block_on(self.api.get_changelog(Some(1))) { + Ok(fatcat_openapi::GetChangelogResponse::Success(entry_vec)) => Some(entry_vec[0].index), + Ok(_) | Err(_) => None, + }; + let has_api_token = self.api_token.is_some(); + let account: Option = if has_api_token && last_changelog.is_some() { + match self.rt.block_on(self.api.auth_check(None)).context("check auth token")? { + fatcat_openapi::AuthCheckResponse::Success(_) => Ok(()), + fatcat_openapi::AuthCheckResponse::Forbidden(err) => Err(anyhow!("Forbidden ({}): {}", err.error, err.message)), + fatcat_openapi::AuthCheckResponse::NotAuthorized{body: err, ..} => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("auth check failed"))?, + }.context("check auth token")?; + match self.rt.block_on(self.api.get_editor(self.editor_id.as_ref().unwrap().to_string())).context("fetching editor account info")? { + fatcat_openapi::GetEditorResponse::Found(editor) => Some(editor), + fatcat_openapi::GetEditorResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message))?, + resp => Err(anyhow!("{:?}", resp)).context(format!("editor fetch failed"))?, + } + } else { + None + }; + Ok(ClientStatus { + api_host: self.api_host.clone(), + has_api_token, + last_changelog, + account, + }) + } + + pub fn update_editgroup_submit(&mut self, editgroup_id: String, submit: bool) -> Result { + let result = self.rt.block_on( + self.api.get_editgroup(editgroup_id.clone()) + ).context("fetch editgroups")?; + let eg = match result { + fatcat_openapi::GetEditgroupResponse::Found(eg) => eg, + other => Err(anyhow!("{:?}", other)) + .context(format!("failed to fetch editgroup {}", editgroup_id))?, + }; + let result = self.rt.block_on( + self.api.update_editgroup(editgroup_id.clone(), eg, Some(submit)) + ).context("submit editgroup")?; + match result { + fatcat_openapi::UpdateEditgroupResponse::UpdatedEditgroup(eg) => Ok(eg), + other => Err(anyhow!("{:?}", other)) + .context(format!("failed to submit editgroup {}", editgroup_id))?, + } + } + + pub fn delete_entity(&mut self, specifier: Specifier, editgroup_id: String) -> Result { + use Specifier::*; + let specifier = specifier.into_entity_specifier(self)?; + match specifier.clone() { + Release(fcid) => match self.rt.block_on(self.api.delete_release(editgroup_id, fcid))? { + fatcat_openapi::DeleteReleaseResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Work(fcid) => match self.rt.block_on(self.api.delete_work(editgroup_id, fcid))? { + fatcat_openapi::DeleteWorkResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Container(fcid) => match self.rt.block_on(self.api.delete_container(editgroup_id, fcid))? { + fatcat_openapi::DeleteContainerResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Creator(fcid) => match self.rt.block_on(self.api.delete_creator(editgroup_id, fcid))? { + fatcat_openapi::DeleteCreatorResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + File(fcid) => match self.rt.block_on(self.api.delete_file(editgroup_id, fcid))? { + fatcat_openapi::DeleteFileResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + FileSet(fcid) => match self.rt.block_on(self.api.delete_fileset(editgroup_id, fcid))? { + fatcat_openapi::DeleteFilesetResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + WebCapture(fcid) => match self.rt.block_on(self.api.delete_webcapture(editgroup_id, fcid))? { + fatcat_openapi::DeleteWebcaptureResponse::DeletedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Editgroup(..) | Editor(..) => unimplemented!("deletion for this entity type"), + Changelog(..) => Err(anyhow!("mutating this entity type doesn't make sense"))?, + EditorUsername(..) | ReleaseLookup(..) | ContainerLookup(..) | FileLookup(..) | CreatorLookup(..) => + Err(anyhow!("into_entity_specifier() didn't work?"))?, + }.context(format!("failed to delete {:?}", specifier)) + } + + pub fn create_entity_from_json(&mut self, entity_type: EntityType, json_str: &str, editgroup_id: String) -> Result { + match entity_type { + EntityType::Release => { + match self.rt.block_on(self.api.create_release(editgroup_id, serde_json::from_str(&json_str)?))? { + fatcat_openapi::CreateReleaseResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + }, + EntityType::Work => { + match self.rt.block_on(self.api.create_work(editgroup_id, serde_json::from_str(&json_str)?))? { + fatcat_openapi::CreateWorkResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + }, + EntityType::Creator => { + match self.rt.block_on(self.api.create_creator(editgroup_id, serde_json::from_str(&json_str)?))? { + fatcat_openapi::CreateCreatorResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + }, + EntityType::Container => { + match self.rt.block_on(self.api.create_container(editgroup_id, serde_json::from_str(&json_str)?))? { + fatcat_openapi::CreateContainerResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + }, + EntityType::File => { + match self.rt.block_on(self.api.create_file(editgroup_id, serde_json::from_str(&json_str)?))? { + fatcat_openapi::CreateFileResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + }, + EntityType::FileSet => { + match self.rt.block_on(self.api.create_fileset(editgroup_id, serde_json::from_str(&json_str)?))? { + fatcat_openapi::CreateFilesetResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + }, + EntityType::WebCapture => { + match self.rt.block_on(self.api.create_webcapture(editgroup_id, serde_json::from_str(&json_str)?))? { + fatcat_openapi::CreateWebcaptureResponse::CreatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + } + }, + }.context(format!("parsing and creating {:?} entity", entity_type)) + } + + pub fn update_entity_from_json(&mut self, specifier: Specifier, json_str: &str, editgroup_id: String) -> Result { + use Specifier::*; + let specifier = specifier.into_entity_specifier(self)?; + match specifier.clone() { + Release(fcid) => match self.rt.block_on(self.api.update_release(editgroup_id, fcid, serde_json::from_str(&json_str)?))? { + fatcat_openapi::UpdateReleaseResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Work(fcid) => match self.rt.block_on(self.api.update_work(editgroup_id, fcid, serde_json::from_str(&json_str)?))? { + fatcat_openapi::UpdateWorkResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Container(fcid) => match self.rt.block_on(self.api.update_container(editgroup_id, fcid, serde_json::from_str(&json_str)?))? { + fatcat_openapi::UpdateContainerResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Creator(fcid) => match self.rt.block_on(self.api.update_creator(editgroup_id, fcid, serde_json::from_str(&json_str)?))? { + fatcat_openapi::UpdateCreatorResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + File(fcid) => match self.rt.block_on(self.api.update_file(editgroup_id, fcid, serde_json::from_str(&json_str)?))? { + fatcat_openapi::UpdateFileResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + FileSet(fcid) => match self.rt.block_on(self.api.update_fileset(editgroup_id, fcid, serde_json::from_str(&json_str)?))? { + fatcat_openapi::UpdateFilesetResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + WebCapture(fcid) => match self.rt.block_on(self.api.update_webcapture(editgroup_id, fcid, serde_json::from_str(&json_str)?))? { + fatcat_openapi::UpdateWebcaptureResponse::UpdatedEntity(ee) => Ok(ee), + other => Err(anyhow!("{:?}", other)), + }, + Editgroup(..) | Editor(..) => unimplemented!("updates for this entity type"), + Changelog(..) => Err(anyhow!("deleting this entity type doesn't make sense"))?, + EditorUsername(..) | ReleaseLookup(..) | ContainerLookup(..) | FileLookup(..) | CreatorLookup(..) => + Err(anyhow!("into_entity_specifier() didn't work?"))?, + }.context(format!("failed to update {:?}", specifier)) + } +} diff --git a/rust/fatcat-cli/src/entities.rs b/rust/fatcat-cli/src/entities.rs new file mode 100644 index 0000000..907061c --- /dev/null +++ b/rust/fatcat-cli/src/entities.rs @@ -0,0 +1,208 @@ + +use std::str::FromStr; +use anyhow::{Result, anyhow, Context}; +use lazy_static::lazy_static; +use regex::Regex; +use toml; +use serde_json; +use serde; +use fatcat_openapi::models; +use crate::Specifier; + + +#[derive(Debug, PartialEq, Clone)] +pub struct Mutation { + field: String, + value: Option, +} + +impl FromStr for Mutation { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + // first try simple entity prefixes + lazy_static! { + static ref MUTATE_ENTITY_RE: Regex = Regex::new(r"^([a-z_]+)=(.*)$").unwrap(); + } + if let Some(captures) = MUTATE_ENTITY_RE.captures(s) { + // XXX: Some() vs None for value + return Ok(Mutation { + field: captures[1].to_string(), + value: match &captures[2] { + "" => None, + val => Some(val.to_string()), + }, + }); + } + Err(anyhow!("not a field mutation: {}", s)) + } +} + +/* + * Goal is to have traits around API entities. Things we'll want to do on concrete entities: + * + * - print, or pretty-print, as JSON or TOML + * - get fcid (or, self-specifier) + * - update (mutate or return copy) fields based on parameters + * - update self to remote API + * + * Methods that might return trait objects: + * + * - get by specifier + */ + +pub trait ApiEntityModel: ApiModelSer+ApiModelIdent+ApiModelMutate { +} + +impl ApiEntityModel for models::ReleaseEntity {} +impl ApiEntityModel for models::ContainerEntity {} +impl ApiEntityModel for models::CreatorEntity {} +impl ApiEntityModel for models::WorkEntity {} +impl ApiEntityModel for models::FileEntity {} +impl ApiEntityModel for models::FilesetEntity {} +impl ApiEntityModel for models::WebcaptureEntity {} +impl ApiEntityModel for models::Editor{} +impl ApiEntityModel for models::Editgroup{} +impl ApiEntityModel for models::ChangelogEntry{} + +pub trait ApiModelSer { + fn to_json_string(&self) -> Result; + fn to_toml_string(&self) -> Result; +} + +impl ApiModelSer for T { + + fn to_json_string(&self) -> Result { + Ok(serde_json::to_string(self)?) + } + + fn to_toml_string(&self) -> Result { + Ok(toml::Value::try_from(self)?.to_string()) + } +} + +pub trait ApiModelIdent { + fn specifier(&self) -> Specifier; +} + +macro_rules! generic_entity_specifier { + ($specifier_type:ident) => { + fn specifier(&self) -> Specifier { + if let Some(fcid) = &self.ident { Specifier::$specifier_type(fcid.to_string()) } else { panic!("expected full entity") } + } + } +} + +impl ApiModelIdent for models::ReleaseEntity { generic_entity_specifier!(Release); } +impl ApiModelIdent for models::ContainerEntity { generic_entity_specifier!(Container); } +impl ApiModelIdent for models::CreatorEntity { generic_entity_specifier!(Creator); } +impl ApiModelIdent for models::WorkEntity { generic_entity_specifier!(Work); } +impl ApiModelIdent for models::FileEntity { generic_entity_specifier!(File); } +impl ApiModelIdent for models::FilesetEntity { generic_entity_specifier!(FileSet); } +impl ApiModelIdent for models::WebcaptureEntity { generic_entity_specifier!(WebCapture); } + +impl ApiModelIdent for models::ChangelogEntry{ + fn specifier(&self) -> Specifier { + Specifier::Changelog(self.index) + } +} + +impl ApiModelIdent for models::Editgroup { + fn specifier(&self) -> Specifier { + if let Some(fcid) = &self.editgroup_id { Specifier::Editgroup(fcid.to_string()) } else { panic!("expected full entity") } + } +} + +impl ApiModelIdent for models::Editor { + fn specifier(&self) -> Specifier { + if let Some(fcid) = &self.editor_id { Specifier::Editor(fcid.to_string()) } else { panic!("expected full entity") } + } +} + +pub trait ApiModelMutate { + fn mutate(&mut self, mutations: Vec) -> Result<()>; +} + +impl ApiModelMutate for models::ReleaseEntity { + fn mutate(&mut self, mutations: Vec) -> Result<()> { + for m in mutations { + match (m.field.as_str(), m.value) { + ("title", val) => { self.title = val; }, + ("subtitle", val) => { self.subtitle = val; }, + (field, _) => unimplemented!("setting field {} on a release", field), + } + } + Ok(()) + } +} + +impl ApiModelMutate for models::ContainerEntity { + fn mutate(&mut self, mutations: Vec) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::CreatorEntity { + fn mutate(&mut self, mutations: Vec) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::WorkEntity { + fn mutate(&mut self, mutations: Vec) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::FileEntity { + fn mutate(&mut self, mutations: Vec) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::FilesetEntity { + fn mutate(&mut self, mutations: Vec) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::WebcaptureEntity { + fn mutate(&mut self, mutations: Vec) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::Editor { + fn mutate(&mut self, mutations: Vec) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::Editgroup { + fn mutate(&mut self, mutations: Vec) -> Result<()> { + unimplemented!("mutations") + } +} + +impl ApiModelMutate for models::ChangelogEntry { + fn mutate(&mut self, mutations: Vec) -> Result<()> { + unimplemented!("mutations") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mutation_from_str() -> () { + assert!(Mutation::from_str("release_asdf").is_err()); + assert_eq!(Mutation::from_str("title=blah").unwrap(), + Mutation { field: "title".to_string(), value: Some("blah".to_string()) }); + assert_eq!(Mutation::from_str("title=").unwrap(), + Mutation { field: "title".to_string(), value: None }); + assert_eq!(Mutation::from_str("title=string with spaces and stuff").unwrap(), + Mutation { field: "title".to_string(), value: Some("string with spaces and stuff".to_string()) }); + } + +} diff --git a/rust/fatcat-cli/src/lib.rs b/rust/fatcat-cli/src/lib.rs new file mode 100644 index 0000000..defa18a --- /dev/null +++ b/rust/fatcat-cli/src/lib.rs @@ -0,0 +1,222 @@ + +use std::io::Read; +use std::path::PathBuf; +use std::io::BufRead; +use tabwriter::TabWriter; +use chrono_humanize::HumanTime; +use anyhow::{Result, anyhow, Context}; +use std::io::Write; +use atty; +use termcolor::{ColorChoice, StandardStream, Color, ColorSpec, WriteColor}; +use data_encoding::BASE64; +use macaroon::{Macaroon, Verifier}; +use fatcat_openapi; +use fatcat_openapi::models; +#[allow(unused_imports)] +use log::{self,info,debug}; +use std::str::FromStr; + +mod entities; +mod search; +mod specifier; +mod api; + +pub use entities::{ApiEntityModel,ApiModelSer,ApiModelIdent,Mutation}; +pub use specifier::Specifier; +pub use api::FatcatApiClient; +pub use search::crude_search; + +// Want to show: +// - whether api_token found +// - configured api_host we are connecting to +// - whether we can connect to remote host (eg, get most recent changelog) +// - whether our auth is valid +// - current active editgroup +#[derive(Debug, PartialEq, Clone, serde::Serialize)] +pub struct ClientStatus { + pub has_api_token: bool, + pub api_host: String, + pub last_changelog: Option, + pub account: Option, +} + +impl ClientStatus { + + pub fn pretty_print(self) -> Result<()> { + + let mut color_stdout = StandardStream::stdout( + if atty::is(atty::Stream::Stdout) { + ColorChoice::Auto + } else { + ColorChoice::Never + } + ); + let color_normal = ColorSpec::new(); + let mut color_bold = ColorSpec::new(); + color_bold.set_bold(true); + let mut color_happy = ColorSpec::new(); + color_happy.set_fg(Some(Color::Green)).set_bold(true); + let mut color_sad = ColorSpec::new(); + color_sad.set_fg(Some(Color::Red)).set_bold(true); + + color_stdout.set_color(&color_normal)?; + write!(&mut color_stdout, "{:>16}: ", "API host")?; + color_stdout.set_color(&color_bold)?; + write!(&mut color_stdout, "{}", self.api_host)?; + match self.last_changelog { + Some(index) => { + color_stdout.set_color(&color_happy)?; + writeln!(&mut color_stdout, " [successfully connected]")?; + color_stdout.set_color(&color_normal)?; + write!(&mut color_stdout, "{:>16}: ", "Last changelog")?; + color_stdout.set_color(&color_bold)?; + writeln!(&mut color_stdout, "{}", index)?; + }, + None => { + color_stdout.set_color(&color_sad)?; + writeln!(&mut color_stdout, " [Failed to connect]")?; + } + }; + color_stdout.set_color(&color_normal)?; + write!(&mut color_stdout, "{:>16}: ", "API auth token")?; + match self.has_api_token { + true => { + color_stdout.set_color(&color_happy)?; + writeln!(&mut color_stdout, "[configured]")?; + }, + false => { + color_stdout.set_color(&color_sad)?; + writeln!(&mut color_stdout, "[not configured]")?; + }, + }; + if let Some(editor) = self.account { + color_stdout.set_color(&color_normal)?; + write!(&mut color_stdout, "{:>16}: ", "Account")?; + color_stdout.set_color(&color_bold)?; + write!(&mut color_stdout, "{}", editor.username)?; + if editor.is_bot == Some(true) { + color_stdout.set_color(ColorSpec::new().set_fg(Some(Color::Blue)).set_bold(true))?; + write!(&mut color_stdout, " [bot]")?; + } + if editor.is_admin == Some(true) { + color_stdout.set_color(ColorSpec::new().set_fg(Some(Color::Magenta)).set_bold(true))?; + write!(&mut color_stdout, " [admin]")?; + } + match editor.is_active { + Some(true) => { + color_stdout.set_color(&color_happy)?; + writeln!(&mut color_stdout, " [active]")?; + }, + Some(false) | None => { + color_stdout.set_color(&color_sad)?; + writeln!(&mut color_stdout, " [disabled]")?; + }, + }; + color_stdout.set_color(&color_normal)?; + writeln!(&mut color_stdout, "{:>16} editor_{}", "", editor.editor_id.unwrap())?; + }; + color_stdout.set_color(&color_normal)?; + Ok(()) + } +} + +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum EntityType { + Release, + Work, + Container, + Creator, + File, + FileSet, + WebCapture, +} + +impl FromStr for EntityType { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s { + "release" => Ok(EntityType::Release), + "work" => Ok(EntityType::Work), + "container" => Ok(EntityType::Container), + "creator" => Ok(EntityType::Creator), + "file" => Ok(EntityType::File), + "fileset" => Ok(EntityType::FileSet), + "webcapture" => Ok(EntityType::WebCapture), + _ => Err(anyhow!("invalid entity type : {}", s)), + } + } +} + +/// Takes a macaroon token (as base64-encoded string) and tries to parse out an editor id +pub fn parse_macaroon_editor_id(s: &str) -> Result { + let raw = BASE64.decode(s.as_bytes()).context("macaroon parsing failed")?; + let mac = Macaroon::deserialize(&raw).map_err(|err| anyhow!("macaroon deserialization failed: {:?}", err))?; + let mac = mac.validate().map_err(|err| anyhow!("macaroon validation failed: {:?}", err))?; + let mut verifier = Verifier::new(); + let mut editor_id: Option = None; + for caveat in mac.first_party_caveats() { + if caveat.predicate().starts_with("editor_id = ") { + editor_id = Some(caveat.predicate().get(12..).context("parsing macaroon")?.to_string()); + break; + } + } + let editor_id = match editor_id { + Some(id) => id, + None => Err(anyhow!("expected an editor_id caveat in macaroon token"))?, + }; + verifier.satisfy_exact(&format!("editor_id = {}", editor_id.to_string())); + Ok(editor_id) +} + +pub fn print_editgroups(eg_list: Vec, json: bool) -> Result<()> { + if json { + for eg in eg_list { + writeln!(&mut std::io::stdout(), "{}", eg.to_json_string()?)?; + } + } else { + let mut tw = TabWriter::new(std::io::stdout()); + writeln!(tw, "editgroup_id\tchangelog_index\tcreated\tsubmitted\tdescription")?; + for eg in eg_list { + writeln!(tw, "{}\t{}\t{}\t{}\t{}", + eg.editgroup_id.unwrap(), + eg.changelog_index.map_or("-".to_string(), |v| v.to_string()), + eg.created.map_or("-".to_string(), |v| HumanTime::from(v).to_string()), + eg.submitted.map_or("-".to_string(), |v| HumanTime::from(v).to_string()), + eg.description.unwrap_or("-".to_string()))?; + } + tw.flush()?; + } + Ok(()) +} + +pub fn read_entity_file(input_path: Option) -> Result { + // treat "-" as "use stdin" + let input_path = match input_path { + Some(s) if s.to_string_lossy() == "-" => None, + _ => input_path, + }; + match input_path { + None => { + let mut line = String::new(); + std::io::stdin().read_line(&mut line)?; + Ok(line) + }, + Some(path) if path.extension().map(|v| v.to_str()) == Some(Some("toml")) => { + info!("reading {:?} as TOML", path); + // as a hack, read TOML but then serialize it back to JSON + let mut contents = String::new(); + let mut input_file = std::fs::File::open(path).context("reading entity from TOML file")?; + input_file.read_to_string(&mut contents)?; + let value: toml::Value = contents.parse().context("parsing TOML file")?; + Ok(serde_json::to_string(&value)?) + }, + Some(path) => { + let mut line = String::new(); + let input_file = std::fs::File::open(path)?; + let mut buffered = std::io::BufReader::new(input_file); + buffered.read_line(&mut line)?; + Ok(line) + }, + } +} diff --git a/rust/fatcat-cli/src/main.rs b/rust/fatcat-cli/src/main.rs new file mode 100644 index 0000000..3ce46d1 --- /dev/null +++ b/rust/fatcat-cli/src/main.rs @@ -0,0 +1,364 @@ + +use std::path::PathBuf; +use fatcat_cli::ApiModelSer; +use atty; +use std::io::Write; +use termcolor::{ColorChoice, StandardStream, Color, ColorSpec, WriteColor}; +use anyhow::{Result, Context, anyhow}; +#[allow(unused_imports)] +use log::{self,info,debug}; +use structopt::StructOpt; +use fatcat_cli::*; +use fatcat_openapi::{client, models, ApiNoContext}; + + +#[derive(StructOpt)] +#[structopt(rename_all = "kebab-case", about = "CLI interface to Fatcat API" )] +struct Opt { + + #[structopt(long = "--api-host", env = "FATCAT_API_HOST", default_value = "https://api.fatcat.wiki")] + api_host: String, + + #[structopt(long = "--api-token", env = "FATCAT_API_AUTH_TOKEN", hide_env_values = true)] + api_token: Option, + + #[structopt(long = "--search-host", env = "FATCAT_SEARCH_HOST", default_value = "https://search.fatcat.wiki")] + search_host: String, + + /// Pass many times for more log output + /// + /// By default, it'll only report errors. Passing `-v` one time also prints + /// warnings, `-vv` enables info logging, `-vvv` debug, and `-vvvv` trace. + #[structopt(long, short = "v", parse(from_occurrences))] + verbose: i8, + + #[structopt(subcommand)] + cmd: Command, +} + +#[derive(StructOpt)] +enum EditgroupCommand { + Create { + #[structopt(long, short)] + description: String, + }, + List { + #[structopt(long = "--editor-id", short)] + editor_id: Option, + + #[structopt(long, short, default_value = "20")] + limit: i64, + + #[structopt(long)] + json: bool, + }, + Reviewable { + #[structopt(long, short, default_value = "20")] + limit: i64, + + #[structopt(long)] + json: bool, + }, + Accept { + #[structopt(env = "FATCAT_EDITGROUP", hide_env_values = true)] + editgroup_id: String, + }, + Submit { + #[structopt(env = "FATCAT_EDITGROUP", hide_env_values = true)] + editgroup_id: String, + }, + Unsubmit { + #[structopt(env = "FATCAT_EDITGROUP", hide_env_values = true)] + editgroup_id: String, + }, +} + +#[derive(StructOpt)] +enum Command { + Status { + #[structopt(long)] + json: bool, + }, + Get { + specifier: Specifier, + + #[structopt(long)] + toml: bool, + }, + Create { + entity_type: EntityType, + + /// Input file, "-" for stdin. + #[structopt(long = "--file", short = "-f", parse(from_os_str))] + input_path: Option, + + #[structopt(long = "--editgroup-id", short, env = "FATCAT_EDITGROUP", hide_env_values = true)] + editgroup_id: String, + }, + Update { + specifier: Specifier, + + /// Input file, "-" for stdin. + #[structopt(long = "--file", short = "-f", parse(from_os_str))] + input_path: Option, + + #[structopt(long = "--editgroup-id", short, env = "FATCAT_EDITGROUP", hide_env_values = true)] + editgroup_id: String, + + mutations: Vec, + }, + Edit { + specifier: Specifier, + + #[structopt(long = "--editgroup-id", short, env = "FATCAT_EDITGROUP", hide_env_values = true)] + editgroup_id: String, + + #[structopt(long)] + json: bool, + + #[structopt(long = "--editing-command", env = "EDITOR")] + editing_command: String, + }, + Delete { + specifier: Specifier, + + #[structopt(long = "--editgroup-id", short, env = "FATCAT_EDITGROUP", hide_env_values = true)] + editgroup_id: String, + }, + Editgroup { + #[structopt(subcommand)] + cmd: EditgroupCommand, + }, + //Changelog + //Download + //History + Search { + + entity_type: EntityType, + + terms: Vec, + + #[structopt(long, short, default_value = "20")] + limit: i64, + + #[structopt(long = "--search-schema")] + search_schema: bool, + }, +} + +fn main() -> Result<()> { + let opt = Opt::from_args(); + + let log_level = match opt.verbose { + std::i8::MIN..=-1 => "none", + 0 => "error", + 1 => "warn", + 2 => "info", + 3 => "debug", + 4..=std::i8::MAX => "trace", + }; + // hyper logging is very verbose, so crank that down even if everything else is more verbose + let log_filter = format!("{},hyper=error", log_level); + env_logger::from_env(env_logger::Env::default().default_filter_or(log_filter)) + .format_timestamp(None) + .init(); + + debug!("Args parsed, starting up"); + + if let Err(err) = run(opt) { + // Be graceful about some errors + if let Some(io_err) = err.root_cause().downcast_ref::() { + match io_err.kind() { + std::io::ErrorKind::BrokenPipe => { + // presumably due to something like writing to stdout and piped to `head -n10` and + // stdout was closed + debug!("got BrokenPipe error, assuming stdout closed as expected and exiting with success"); + std::process::exit(0); + }, + _ => (), + } + } + let mut color_stderr = StandardStream::stderr( + if atty::is(atty::Stream::Stderr) { + ColorChoice::Auto + } else { + ColorChoice::Never + } + ); + color_stderr.set_color(ColorSpec::new().set_fg(Some(Color::Red)).set_bold(true))?; + eprintln!("Error: {:?}", err); + color_stderr.set_color(&ColorSpec::new())?; + std::process::exit(1); + } + Ok(()) +} + +fn run(opt: Opt) -> Result<()> { + let client = if opt.api_host.starts_with("https://") { + // Using Simple HTTPS + client::Client::try_new_https(&opt.api_host).context("Failed to create HTTPS client")? + } else if opt.api_host.starts_with("http://") { + // Using HTTP + client::Client::try_new_http(&opt.api_host).context("Failed to create HTTP client")? + } else { + Err(anyhow!("unsupported API Host prefix: {}", opt.api_host))? + }; + + let mut api_client = FatcatApiClient::new(&client, opt.api_host.clone(), opt.api_token.clone())?; + + match opt.cmd { + Command::Get {toml, specifier} => { + let result = specifier.get_from_api(&mut api_client)?; + if toml { + writeln!(&mut std::io::stdout(), "{}", result.to_toml_string()?)? + } else { + writeln!(&mut std::io::stdout(), "{}", result.to_json_string()?)? + } + }, + Command::Create { entity_type, input_path, editgroup_id } => { + let json_str = read_entity_file(input_path)?; + let ee = api_client.create_entity_from_json(entity_type, &json_str, editgroup_id)?; + println!("{}", serde_json::to_string(&ee)?); + }, + Command::Update { specifier, input_path, editgroup_id, mutations } => { + let (json_str, exact_specifier): (String, Specifier) = match (&input_path, mutations.len()) { + // input path or no mutations: read from path or stdin + (Some(_), _) | (None, 0) => { + (read_entity_file(input_path)?, specifier.into_entity_specifier(&mut api_client)?) + }, + // no input path *and* mutations: fetch from API + (None, _) => { + let mut entity = specifier.get_from_api(&mut api_client)?; + entity.mutate(mutations)?; + (entity.to_json_string()?, entity.specifier()) + }, + }; + let ee = api_client.update_entity_from_json(exact_specifier, &json_str, editgroup_id)?; + println!("{}", serde_json::to_string(&ee)?); + }, + Command::Edit { specifier, editgroup_id, json, editing_command } => { + // TODO: fetch editgroup, check if this entity is already being updated in it. If so, + // need to fetch that revision, do the edit, parse that synatx is good, then delete the + // existing edit and update with the new one. + let original_entity = specifier.get_from_api(&mut api_client)?; + let exact_specifier = original_entity.specifier(); + let tmp_file = tempfile::Builder::new() + .suffix( if json { ".json" } else { ".toml"} ) + .tempfile()?; + if json { + writeln!(&tmp_file, "{}", original_entity.to_json_string()?)? + } else { + writeln!(&tmp_file, "{}", original_entity.to_toml_string()?)? + } + let mut editor_cmd = std::process::Command::new(&editing_command) + .arg(tmp_file.path()) + .spawn() + .expect("failed to execute process"); + let cmd_status = editor_cmd.wait()?; + if !cmd_status.success() { + Err(anyhow!("editor ({}) exited with non-success status code ({}), bailing on edit", editing_command, cmd_status.code().map(|v| v.to_string()).unwrap_or("N/A".to_string())))?; + }; + let json_str = read_entity_file(Some(tmp_file.path().to_path_buf()))?; + // for whatever reason api_client's TCP connection is broken after spawning, so try a + // dummy call, expected to fail, but connection should re-establish after this + specifier.get_from_api(&mut api_client).context("re-fetch").ok(); + let ee = api_client.update_entity_from_json(exact_specifier, &json_str, editgroup_id).context("updating after edit")?; + println!("{}", serde_json::to_string(&ee)?); + }, + Command::Search { entity_type, terms, limit, search_schema } => { + let limit: Option = match limit { + l if l < 0 => None, + l => Some(l as u64), + }; + let results = fatcat_cli::crude_search(&opt.search_host, entity_type, limit, terms) + .context(format!("searching for {:?}", entity_type))?; + eprintln!("Got {} hits in {}ms", results.count, results.took_ms); + for hit in results { + let hit = hit?; + match (search_schema, entity_type) { + (true, _) => writeln!(&mut std::io::stdout(), "{}", hit.to_string())?, + (false, EntityType::Release) => { + let specifier = Specifier::Release(hit["ident"].as_str().unwrap().to_string()); + let entity = specifier.get_from_api(&mut api_client)?; + writeln!(&mut std::io::stdout(), "{}", entity.to_json_string()?)? + }, + (false, _) => unimplemented!("searching other entity types"), + } + } + }, + Command::Delete { specifier, editgroup_id } => { + let result = api_client.delete_entity(specifier.clone(), editgroup_id) + .context(format!("delete entity: {:?}", specifier.clone()))?; + println!("{}", serde_json::to_string(&result)?); + }, + Command::Editgroup { cmd: EditgroupCommand::List { editor_id, limit, json } } => { + let editor_id = match editor_id.or(api_client.editor_id) { + Some(eid) => eid, + None => Err(anyhow!("require either working auth token or --editor-id"))?, + }; + let result = api_client.rt.block_on( + api_client.api.get_editor_editgroups(editor_id.clone(), Some(limit), None, None) + ).context("fetch editgroups")?; + match result { + fatcat_openapi::GetEditorEditgroupsResponse::Found(eg_list) => { + print_editgroups(eg_list, json)?; + }, + other => Err(anyhow!("{:?}", other)).context(format!("failed to fetch editgroups for editor_{}", editor_id))?, + } + }, + Command::Editgroup { cmd: EditgroupCommand::Reviewable { limit, json } } => { + let result = api_client.rt.block_on( + api_client.api.get_editgroups_reviewable(Some("editors".to_string()), Some(limit), None, None) + ).context("fetch reviewable editgroups")?; + match result { + fatcat_openapi::GetEditgroupsReviewableResponse::Found(eg_list) => { + print_editgroups(eg_list, json)?; + }, + other => Err(anyhow!("{:?}", other)).context(format!("failed to fetch reviewable editgroups"))?, + } + }, + Command::Editgroup { cmd: EditgroupCommand::Create { description }} => { + let mut eg = models::Editgroup::new(); + eg.description = Some(description); + eg.extra = Some({ + let mut extra = std::collections::HashMap::new(); + extra.insert("agent".to_string(), serde_json::Value::String("fatcat-cli".to_string())); + extra + }); + let result = api_client.rt.block_on( + api_client.api.create_editgroup(eg))?; + match result { + fatcat_openapi::CreateEditgroupResponse::SuccessfullyCreated(eg) => + println!("{}", serde_json::to_string(&eg)?), + other => Err(anyhow!("{:?}", other)).context(format!("failed to create editgroup"))?, + } + }, + Command::Editgroup { cmd: EditgroupCommand::Accept { editgroup_id } } => { + let result = api_client.rt.block_on( + api_client.api.accept_editgroup(editgroup_id.clone()) + ).context("accept editgroup")?; + match result { + fatcat_openapi::AcceptEditgroupResponse::MergedSuccessfully(msg) => + println!("{}", serde_json::to_string(&msg)?), + other => Err(anyhow!("failed to accept editgroup {}: {:?}", editgroup_id, other))?, + } + }, + Command::Editgroup { cmd: EditgroupCommand::Submit{ editgroup_id } } => { + let eg = api_client.update_editgroup_submit(editgroup_id, true)?; + println!("{}", eg.to_json_string()?); + }, + Command::Editgroup { cmd: EditgroupCommand::Unsubmit { editgroup_id } } => { + let eg = api_client.update_editgroup_submit(editgroup_id, false)?; + println!("{}", eg.to_json_string()?); + }, + Command::Status { json } => { + let status = api_client.status()?; + if json { + println!("{}", serde_json::to_string(&status)?) + } else { + status.pretty_print()?; + } + }, + } + Ok(()) +} diff --git a/rust/fatcat-cli/src/mutation.rs b/rust/fatcat-cli/src/mutation.rs new file mode 100644 index 0000000..e69de29 diff --git a/rust/fatcat-cli/src/search.rs b/rust/fatcat-cli/src/search.rs new file mode 100644 index 0000000..ec5d1d6 --- /dev/null +++ b/rust/fatcat-cli/src/search.rs @@ -0,0 +1,172 @@ + +use serde_json::json; +use std::time::Duration; +use anyhow::{Result, Context, anyhow}; +use log::{self,info,debug}; +use crate::EntityType; + + +pub struct SearchResults { + pub entity_type: EntityType, + pub limit: Option, + pub count: u64, + pub took_ms: u64, + offset: u64, + batch: Vec, + scroll_id: Option, + scroll_url: String, + http_client: reqwest::Client, +} + +impl Iterator for SearchResults { + type Item = Result; + + fn next(&mut self) -> Option> { + // if we already hit limit, bail early + if let Some(l) = self.limit { + if self.offset >= l { + return None + } + } + // if current batch is empty, and we are scrolling, refill the current batch + if self.batch.len() == 0 && self.scroll_id.is_some() { + let response = self.http_client.get(&self.scroll_url) + .header("Content-Type", "application/json") + .body(json!({ + "scroll": "2m", + "scroll_id": self.scroll_id.clone().unwrap(), + }).to_string()) + .send(); + let mut response = match response { + Err(e) => return Some(Err(e.into())), + Ok(v) => v, + }; + if !response.status().is_success() { + return Some(Err(anyhow!("search error, status={}", response.status()))); + }; + let body: serde_json::Value = match response.json() { + Err(e) => return Some(Err(e.into())), + Ok(v) => v, + }; + self.scroll_id = Some(body["_scroll_id"].as_str().unwrap().to_string()); + self.batch = body["hits"]["hits"].as_array().unwrap().to_vec(); + } + + // return next hit from the most recent batch + if self.batch.len() > 0 { + self.offset += 1; + let val = self.batch.pop().unwrap(); + let source = val["_source"].clone(); + return Some(Ok(source)) + } + + // if batch is empty and couldn't be refilled, terminate + // TODO: should we raise error if ended early? + return None + } +} + +pub fn crude_search(api_host: &str, entity_type: EntityType, limit: Option, terms: Vec) -> Result { + + let index = match entity_type { + EntityType::Release => "fatcat_release", + EntityType::File => "fatcat_file", + EntityType::Container => "fatcat_container", + _ => Err(anyhow!("No search index for entity type: {:?}", entity_type))?, + }; + let http_client = reqwest::Client::builder() + .timeout(Duration::from_secs(10)) + .danger_accept_invalid_certs(true) + .build() + .expect("ERROR :: Could not build reqwest client"); + + let query: String = if terms.len() == 0 { + "*".to_string() + } else { + terms.join(" ") + }; + info!("Search query string: {}", query); + let request_url = format!("{}/{}/_search", api_host, index); + let scroll_url = format!("{}/_search/scroll", api_host); + + // sort by _doc for (potentially) very large result sets + let (scroll_mode, sort_mode, size) = match limit { + None => (true, "_doc", 100), + Some(l) if l > 100 => (true, "_doc", 100), + Some(l) => (false, "_score", l), + + }; + + let query_body = json!({ + "query": { + "boosting": { + "positive": { + "bool": { + "must": { + "query_string": { + "query": query, + "default_operator": "AND", + "analyze_wildcard": true, + "allow_leading_wildcard": false, + "lenient": true, + "fields": [ + "title^2", + "biblio", + ], + }, + }, + "should": { + "term": { "in_ia": true }, + }, + }, + }, + "negative": { + "bool": { + "should": [ + {"bool": { "must_not" : { "exists": { "field": "title" }}}}, + {"bool": { "must_not" : { "exists": { "field": "year" }}}}, + {"bool": { "must_not" : { "exists": { "field": "type" }}}}, + {"bool": { "must_not" : { "exists": { "field": "stage" }}}}, + ], + }, + }, + "negative_boost": 0.5, + }, + }, + "size": size, + "sort": [ sort_mode ], + }).to_string(); + + let mut request = http_client.get(&request_url) + .header("Content-Type", "application/json") + .body(query_body); + + if scroll_mode { + request = request.query(&[("scroll", "2m")]); + } + + let mut response = request.send()?; + + if !response.status().is_success() { + Err(anyhow!("search error, status={}", response.status()))?; + } + //println!("{:?}", response); + let body: serde_json::Value = response.json()?; + + let scroll_id = match scroll_mode { + false => None, + true => Some(body["_scroll_id"].as_str().unwrap().to_string()), + }; + + Ok(SearchResults { + entity_type, + limit, + count: body["hits"]["total"].as_u64().unwrap(), + took_ms: body["took"].as_u64().unwrap(), + offset: 0, + batch: body["hits"]["hits"].as_array().unwrap().to_vec(), + scroll_id, + scroll_url, + http_client, + }) +} diff --git a/rust/fatcat-cli/src/specifier.rs b/rust/fatcat-cli/src/specifier.rs new file mode 100644 index 0000000..f8b4420 --- /dev/null +++ b/rust/fatcat-cli/src/specifier.rs @@ -0,0 +1,282 @@ + +use fatcat_openapi::ApiNoContext; +use anyhow::{Result, anyhow, Context}; +use std::str::FromStr; +use lazy_static::lazy_static; +use regex::Regex; +use crate::{ApiEntityModel, FatcatApiClient}; + + +#[derive(Debug, PartialEq, Clone)] +pub enum ReleaseLookupKey { + DOI, + PMCID, + PMID, + Arxiv, + // TODO: the others +} + +#[derive(Debug, PartialEq, Clone)] +pub enum ContainerLookupKey { + ISSNL, +} + +#[derive(Debug, PartialEq, Clone)] +pub enum CreatorLookupKey { + Orcid, +} + +#[derive(Debug, PartialEq, Clone)] +pub enum FileLookupKey { + SHA1, + SHA256, + MD5, +} + +#[derive(Debug, PartialEq, Clone)] +pub enum Specifier { + Release(String), + ReleaseLookup(ReleaseLookupKey, String), + Work(String), + Container(String), + ContainerLookup(ContainerLookupKey, String), + Creator(String), + CreatorLookup(CreatorLookupKey, String), + File(String), + FileLookup(FileLookupKey, String), + FileSet(String), + WebCapture(String), + Editgroup(String), + Editor(String), + EditorUsername(String), + Changelog(i64), +} + +impl Specifier { + + /// If this Specifier is a lookup, call the API to do the lookup and return the resulting + /// specific entity specifier (eg, with an FCID). If already specific, just pass through. + pub fn into_entity_specifier(self, api_client: &mut FatcatApiClient) -> Result { + use Specifier::*; + match self { + Release(_) | Work(_) | Creator(_) | Container(_) | File(_) | FileSet(_) | WebCapture(_) | Editgroup(_) | Editor(_) | Changelog(_) => Ok(self), + ReleaseLookup(_, _) => Ok(self.get_from_api(api_client)?.specifier()), + ContainerLookup(_, _) => Ok(self.get_from_api(api_client)?.specifier()), + CreatorLookup(_, _) => Ok(self.get_from_api(api_client)?.specifier()), + FileLookup(_, _) => Ok(self.get_from_api(api_client)?.specifier()), + EditorUsername(_username) => { + Err(anyhow!("editor lookup by username isn't implemented in fatcat-server API yet, sorry")) + }, + } + } + + pub fn get_from_api(&self, api_client: &mut FatcatApiClient) -> Result> { + use Specifier::*; + let ret: Result> = match self { + Release(fcid) => + match api_client.rt.block_on(api_client.api.get_release(fcid.to_string(), None, None))? { + fatcat_openapi::GetReleaseResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetReleaseResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::GetReleaseResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + }, + ReleaseLookup(ext_id, key) => { + use ReleaseLookupKey::*; + let (doi, pmcid, pmid, arxiv) = ( + if let DOI = ext_id { Some(key.to_string()) } else { None }, + if let PMCID = ext_id { Some(key.to_string()) } else { None }, + if let PMID = ext_id { Some(key.to_string()) } else { None }, + if let Arxiv = ext_id { Some(key.to_string()) } else { None }, + ); + // doi, wikidata, isbn13, pmid, pmcid, core, arxiv, jstor, ark, mag + let result = api_client.rt.block_on( + api_client.api.lookup_release(doi, None, None, pmid, pmcid, None, arxiv, None, None, None, None, None))?; + match result { + fatcat_openapi::LookupReleaseResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::LookupReleaseResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::LookupReleaseResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + } + }, + Work(fcid) => + match api_client.rt.block_on(api_client.api.get_work(fcid.to_string(), None, None))? { + fatcat_openapi::GetWorkResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetWorkResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::GetWorkResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + }, + Container(fcid) => + match api_client.rt.block_on(api_client.api.get_container(fcid.to_string(), None, None))? { + fatcat_openapi::GetContainerResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetContainerResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::GetContainerResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + }, + ContainerLookup(ext_id, key) => { + let result = api_client.rt.block_on(match ext_id { + ContainerLookupKey::ISSNL => api_client.api.lookup_container(Some(key.to_string()), None, None, None), + })?; + match result { + fatcat_openapi::LookupContainerResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::LookupContainerResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::LookupContainerResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + } + }, + Creator(fcid) => + match api_client.rt.block_on(api_client.api.get_creator(fcid.to_string(), None, None))? { + fatcat_openapi::GetCreatorResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetCreatorResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::GetCreatorResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + }, + CreatorLookup(ext_id, key) => { + let result = api_client.rt.block_on(match ext_id { + CreatorLookupKey::Orcid => api_client.api.lookup_creator(Some(key.to_string()), None, None, None), + })?; + match result { + fatcat_openapi::LookupCreatorResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::LookupCreatorResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::LookupCreatorResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + } + }, + File(fcid) => + match api_client.rt.block_on(api_client.api.get_file(fcid.to_string(), None, None))? { + fatcat_openapi::GetFileResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetFileResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::GetFileResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + }, + FileLookup(hash, key) => { + use FileLookupKey::*; + let (sha1, sha256, md5) = ( + if let SHA1 = hash { Some(key.to_string()) } else { None }, + if let SHA256 = hash { Some(key.to_string()) } else { None }, + if let MD5 = hash { Some(key.to_string()) } else { None }, + ); + let result = api_client.rt.block_on( + api_client.api.lookup_file(sha1, sha256, md5, None, None), + )?; + match result { + fatcat_openapi::LookupFileResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::LookupFileResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::LookupFileResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + } + }, + FileSet(fcid) => + match api_client.rt.block_on(api_client.api.get_fileset(fcid.to_string(), None, None))? { + fatcat_openapi::GetFilesetResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetFilesetResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::GetFilesetResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + }, + WebCapture(fcid) => + match api_client.rt.block_on(api_client.api.get_webcapture(fcid.to_string(), None, None))? { + fatcat_openapi::GetWebcaptureResponse::FoundEntity(model) => Ok(Box::new(model)), + fatcat_openapi::GetWebcaptureResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::GetWebcaptureResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + }, + Editgroup(fcid) => + match api_client.rt.block_on(api_client.api.get_editgroup(fcid.to_string()))? { + fatcat_openapi::GetEditgroupResponse::Found(model) => Ok(Box::new(model)), + fatcat_openapi::GetEditgroupResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::GetEditgroupResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + }, + Editor(fcid) => + match api_client.rt.block_on(api_client.api.get_editor(fcid.to_string()))? { + fatcat_openapi::GetEditorResponse::Found(model) => Ok(Box::new(model)), + fatcat_openapi::GetEditorResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::GetEditorResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + }, + Changelog(index) => + match api_client.rt.block_on(api_client.api.get_changelog_entry(*index))? { + fatcat_openapi::GetChangelogEntryResponse::FoundChangelogEntry(model) => Ok(Box::new(model)), + fatcat_openapi::GetChangelogEntryResponse::BadRequest(err) => Err(anyhow!("Bad Request ({}): {}", err.error, err.message)), + fatcat_openapi::GetChangelogEntryResponse::NotFound(err) => Err(anyhow!("Not Found: {}", err.message)), + resp => Err(anyhow!("{:?}", resp)).context(format!("API GET failed: {:?}", self))?, + }, + EditorUsername(_username) => { + unimplemented!("editor lookup by username isn't implemented in fatcat-server API yet, sorry") + }, + }; + match ret { + Ok(_) => ret, + Err(_) => ret.with_context(|| format!("Failed to GET {:?}", self)), + } + } +} + +impl FromStr for Specifier { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + // first try simple entity prefixes + lazy_static! { + static ref SPEC_ENTITY_RE: Regex = Regex::new(r"^(release|work|creator|container|file|fileset|webcapture|editgroup|editor)_([2-7a-z]{26})$").unwrap(); + } + if let Some(caps) = SPEC_ENTITY_RE.captures(s) { + return match (&caps[1], &caps[2]) { + ("release", fcid) => Ok(Specifier::Release(fcid.to_string())), + ("work", fcid) => Ok(Specifier::Work(fcid.to_string())), + ("container", fcid) => Ok(Specifier::Container(fcid.to_string())), + ("creator", fcid) => Ok(Specifier::Creator(fcid.to_string())), + ("file", fcid) => Ok(Specifier::File(fcid.to_string())), + ("fileset", fcid) => Ok(Specifier::FileSet(fcid.to_string())), + ("webcapture", fcid) => Ok(Specifier::WebCapture(fcid.to_string())), + ("editgroup", fcid) => Ok(Specifier::Editgroup(fcid.to_string())), + ("editor", fcid) => Ok(Specifier::Editor(fcid.to_string())), + _ => Err(anyhow!("unexpected fatcat FCID type: {}", &caps[1]))?, + }; + } + + // then try lookup prefixes + lazy_static! { + static ref SPEC_LOOKUP_RE: Regex = Regex::new(r"^(doi|pmcid|pmid|arxiv|issnl|orcid|sha1|sha256|md5|username|changelog):(\S+)$").unwrap(); + } + if let Some(caps) = SPEC_LOOKUP_RE.captures(s) { + return match (&caps[1], &caps[2]) { + ("doi", key) => Ok(Specifier::ReleaseLookup(ReleaseLookupKey::DOI, key.to_string())), + ("pmcid", key) => Ok(Specifier::ReleaseLookup(ReleaseLookupKey::PMCID, key.to_string())), + ("pmid", key) => Ok(Specifier::ReleaseLookup(ReleaseLookupKey::PMID, key.to_string())), + ("arxiv", key) => Ok(Specifier::ReleaseLookup(ReleaseLookupKey::Arxiv, key.to_string())), + ("issnl", key) => Ok(Specifier::ContainerLookup(ContainerLookupKey::ISSNL, key.to_string())), + ("orcid", key) => Ok(Specifier::CreatorLookup(CreatorLookupKey::Orcid, key.to_string())), + ("sha1", key) => Ok(Specifier::FileLookup(FileLookupKey::SHA1, key.to_string())), + ("sha256", key) => Ok(Specifier::FileLookup(FileLookupKey::SHA256, key.to_string())), + ("md5", key) => Ok(Specifier::FileLookup(FileLookupKey::MD5, key.to_string())), + ("username", key) => Ok(Specifier::EditorUsername(key.to_string())), + _ => Err(anyhow!("unexpected entity lookup type: {}", &caps[1]))?, + }; + } + // lastly, changelog entity lookup + lazy_static! { + static ref SPEC_CHANGELOG_RE: Regex = Regex::new(r"^changelog_(\d+)$").unwrap(); + }; + if let Some(caps) = SPEC_CHANGELOG_RE.captures(s) { + return Ok(Specifier::Changelog(caps[1].parse::()?)); + } + return Err(anyhow!("expecting a specifier: entity identifier or key/value lookup: {}", s)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_specifier_from_str() -> () { + assert!(Specifier::from_str("release_asdf").is_err()); + assert_eq!(Specifier::from_str("creator_iimvc523xbhqlav6j3sbthuehu").unwrap(), Specifier::Creator("iimvc523xbhqlav6j3sbthuehu".to_string())); + assert_eq!(Specifier::from_str("username:big-bot").unwrap(), Specifier::EditorUsername("big-bot".to_string())); + assert_eq!(Specifier::from_str("doi:10.1234/a!s.df+-d").unwrap(), Specifier::ReleaseLookup(ReleaseLookupKey::DOI, "10.1234/a!s.df+-d".to_string())); + assert!(Specifier::from_str("doi:").is_err()); + assert_eq!(Specifier::from_str("changelog_1234").unwrap(), Specifier::Changelog(1234)); + assert!(Specifier::from_str("changelog_12E4").is_err()); + } + +} -- cgit v1.2.3