diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-02-10 14:40:38 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-02-10 14:40:38 -0800 |
commit | b85353d4b6c8ff6fd71c31b3bd618c76afcd6a39 (patch) | |
tree | 79b920bb7a197a9a7fa2d6be446c05da4349c6ae | |
parent | 32388ecb860949fe72941cd50a65964dc9c18e1e (diff) | |
download | fatcat-cli-b85353d4b6c8ff6fd71c31b3bd618c76afcd6a39.tar.gz fatcat-cli-b85353d4b6c8ff6fd71c31b3bd618c76afcd6a39.zip |
download: filenames as specifiers, not sha1
-rw-r--r-- | fatcat-cli/src/download.rs | 18 | ||||
-rw-r--r-- | fatcat-cli/src/main.rs | 2 | ||||
-rw-r--r-- | fatcat-cli/src/specifier.rs | 66 |
3 files changed, 74 insertions, 12 deletions
diff --git a/fatcat-cli/src/download.rs b/fatcat-cli/src/download.rs index 6a420b0..cf520fe 100644 --- a/fatcat-cli/src/download.rs +++ b/fatcat-cli/src/download.rs @@ -8,6 +8,7 @@ use std::fs::File; use std::io::{self, BufRead}; use std::path::PathBuf; use url::Url; +use crate::{ApiModelIdent, Specifier}; #[derive(Debug, PartialEq, Clone)] pub enum DownloadStatus { @@ -64,9 +65,8 @@ fn rewrite_wayback_url(url: Url) -> Result<Url> { Ok(url) } -fn default_filename(fe: &FileEntity) -> Result<PathBuf> { +fn default_filename(specifier: &Specifier, fe: &FileEntity) -> Result<PathBuf> { - let sha1hex = &fe.sha1.clone().unwrap(); let file_suffix = match fe.mimetype.as_ref().map(String::as_str) { Some("application/pdf") => ".pdf", Some("application/postscript") => ".ps", @@ -78,12 +78,12 @@ fn default_filename(fe: &FileEntity) -> Result<PathBuf> { _ => "", }; - let path_string = format!("{}{}", sha1hex, file_suffix); + let path_string = format!("{}{}", specifier, file_suffix); Ok(PathBuf::from(&path_string)) } /// Attempts to download a file entity, including verifying checksum. -pub fn download_file(fe: &FileEntity, output_path: Option<PathBuf>) -> Result<DownloadStatus> { +pub fn download_file(fe: &FileEntity, specifier: &Specifier, output_path: Option<PathBuf>) -> Result<DownloadStatus> { match &fe.sha1 { Some(v) => v, None => return Ok(DownloadStatus::FileMissingMetadata), @@ -96,11 +96,11 @@ pub fn download_file(fe: &FileEntity, output_path: Option<PathBuf>) -> Result<Do let final_path = match output_path { Some(ref path) if path.is_dir() => { let mut full = output_path.unwrap_or(PathBuf::new()); - full.push(default_filename(fe)?); + full.push(default_filename(specifier, fe)?); full } Some(path) => path, - None => default_filename(fe)?, + None => default_filename(specifier, fe)?, }; // NOTE: this isn't perfect; there could have been a race condition @@ -193,8 +193,9 @@ pub fn download_release(re: &ReleaseEntity, output_path: Option<PathBuf>) -> Res Some(list) => list, }; let mut status = DownloadStatus::NoPublicFile; + let specifier = re.specifier(); for fe in file_entities { - status = download_file(&fe, output_path.clone())?; + status = download_file(&fe, &specifier, output_path.clone())?; match status { DownloadStatus::Exists(_) | DownloadStatus::Downloaded(_) => break, _ => (), @@ -223,7 +224,8 @@ fn download_entity(json_str: String, output_path: Option<PathBuf>) -> Result<Dow match file_attempt { Ok(fe) => { if fe.ident.is_some() && fe.urls.is_some() { - let status = download_file(&fe, output_path)?; + let specifier = fe.specifier(); + let status = download_file(&fe, &specifier, output_path)?; println!( "file_{}\t{}\t{}", fe.ident.unwrap(), diff --git a/fatcat-cli/src/main.rs b/fatcat-cli/src/main.rs index 7294735..ced35b4 100644 --- a/fatcat-cli/src/main.rs +++ b/fatcat-cli/src/main.rs @@ -528,7 +528,7 @@ fn run(opt: Opt) -> Result<()> { resp => Err(anyhow!("{:?}", resp)) .with_context(|| format!("API GET failed: {:?}", ident)), }?; - download_file(&file_entity, output_path) + download_file(&file_entity, &file_entity.specifier(), output_path) } other => Err(anyhow!("Don't know how to download: {:?}", other)), }?; diff --git a/fatcat-cli/src/specifier.rs b/fatcat-cli/src/specifier.rs index 0d8d209..65d095c 100644 --- a/fatcat-cli/src/specifier.rs +++ b/fatcat-cli/src/specifier.rs @@ -3,6 +3,7 @@ use anyhow::{anyhow, Context, Result}; use lazy_static::lazy_static; use regex::Regex; use std::str::FromStr; +use std::fmt; #[derive(Debug, PartialEq, Clone)] pub enum ReleaseLookupKey { @@ -13,16 +14,43 @@ pub enum ReleaseLookupKey { // TODO: the others } +impl fmt::Display for ReleaseLookupKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::DOI => write!(f, "doi"), + Self::PMCID => write!(f, "pmcid"), + Self::PMID => write!(f, "pmid"), + Self::Arxiv => write!(f, "arxiv"), + } + } +} + #[derive(Debug, PartialEq, Clone)] pub enum ContainerLookupKey { ISSNL, } +impl fmt::Display for ContainerLookupKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::ISSNL => write!(f, "issnl"), + } + } +} + #[derive(Debug, PartialEq, Clone)] pub enum CreatorLookupKey { Orcid, } +impl fmt::Display for CreatorLookupKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Orcid => write!(f, "orcid"), + } + } +} + #[derive(Debug, PartialEq, Clone)] pub enum FileLookupKey { SHA1, @@ -30,6 +58,16 @@ pub enum FileLookupKey { MD5, } +impl fmt::Display for FileLookupKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::SHA1=> write!(f, "sha1"), + Self::SHA256=> write!(f, "sha256"), + Self::MD5=> write!(f, "md5"), + } + } +} + #[derive(Debug, PartialEq, Clone)] pub enum Specifier { Release(String), @@ -475,6 +513,28 @@ impl Specifier { } } +impl fmt::Display for Specifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Release(fcid) => write!(f, "release_{}", fcid), + Self::ReleaseLookup(prefix, val) => write!(f, "{}:{}", prefix, val), + Self::Work(fcid) => write!(f, "release_{}", fcid), + Self::Container(fcid) => write!(f, "release_{}", fcid), + Self::ContainerLookup(prefix, val) => write!(f, "{}:{}", prefix, val), + Self::Creator(fcid) => write!(f, "release_{}", fcid), + Self::CreatorLookup(prefix, val) => write!(f, "{}:{}", prefix, val), + Self::File(fcid) => write!(f, "release_{}", fcid), + Self::FileLookup(prefix, val) => write!(f, "{}:{}", prefix, val), + Self::FileSet(fcid) => write!(f, "release_{}", fcid), + Self::WebCapture(fcid) => write!(f, "release_{}", fcid), + Self::Editgroup(fcid) => write!(f, "release_{}", fcid), + Self::Editor(fcid) => write!(f, "release_{}", fcid), + Self::EditorUsername(username) => write!(f, "user:{}", username), + Self::Changelog(index) => write!(f, "changelog_{}", index), + } + } +} + impl FromStr for Specifier { type Err = anyhow::Error; @@ -501,7 +561,7 @@ impl FromStr for Specifier { // then try lookup prefixes lazy_static! { static ref SPEC_LOOKUP_RE: Regex = Regex::new( - r"^(doi|pmcid|pmid|arxiv|issnl|orcid|sha1|sha256|md5|username|changelog):(\S+)$" + r"^(doi|pmcid|pmid|arxiv|issnl|orcid|sha1|sha256|md5|user):(\S+)$" ) .unwrap(); } @@ -537,7 +597,7 @@ impl FromStr for Specifier { key.to_string(), )), ("md5", key) => Ok(Specifier::FileLookup(FileLookupKey::MD5, key.to_string())), - ("username", key) => Ok(Specifier::EditorUsername(key.to_string())), + ("user", key) => Ok(Specifier::EditorUsername(key.to_string())), _ => Err(anyhow!("unexpected entity lookup type: {}", &caps[1])), }; } @@ -567,7 +627,7 @@ mod tests { Specifier::Creator("iimvc523xbhqlav6j3sbthuehu".to_string()) ); assert_eq!( - Specifier::from_str("username:big-bot").unwrap(), + Specifier::from_str("user:big-bot").unwrap(), Specifier::EditorUsername("big-bot".to_string()) ); assert_eq!( |