aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-04-09 19:33:08 -0700
committerBryan Newbold <bnewbold@archive.org>2021-04-09 19:33:08 -0700
commit61e52b98a14ea493ea9fd3e7be84ca4e4d04b993 (patch)
treec9afc8fa46f9d8376ea41a8965850e05a617860a
parent3aa7276e7ef5b9e8276328b24dff3b9aef7e7367 (diff)
downloadfatcat-cli-61e52b98a14ea493ea9fd3e7be84ca4e4d04b993.tar.gz
fatcat-cli-61e52b98a14ea493ea9fd3e7be84ca4e4d04b993.zip
simple preference for IA URLs (if available)
-rw-r--r--fatcat-cli/src/download.rs18
1 files changed, 15 insertions, 3 deletions
diff --git a/fatcat-cli/src/download.rs b/fatcat-cli/src/download.rs
index a0f0eeb..53c5a41 100644
--- a/fatcat-cli/src/download.rs
+++ b/fatcat-cli/src/download.rs
@@ -148,13 +148,25 @@ pub fn download_file(
let download_path = final_path.with_extension("partial_download");
- // TODO: only archive.org URLs (?)
let raw_url = match fe.urls.as_ref() {
None => return Ok(DownloadStatus::NoPublicFile),
Some(url_list) if url_list.is_empty() => return Ok(DownloadStatus::NoPublicFile),
// TODO: remove clone (?)
- // TODO: better heuristic than "just try first URL"
- Some(url_list) => url_list[0].url.clone(),
+ Some(url_list) => {
+ // prefer an IA_hosted URL, but fallback to any URL
+ let public_url_list: Vec<&fatcat_openapi::models::FileUrl> = url_list.iter().filter(|v| {
+ if let Ok(url) = Url::parse(&v.url) {
+ url.host_str() == Some("web.archive.org") || url.host_str() == Some("archive.org")
+ } else {
+ false
+ }
+ }).collect();
+ if !public_url_list.is_empty() {
+ public_url_list[0].url.clone()
+ } else {
+ url_list[0].url.clone()
+ }
+ },
};
let mut url = Url::parse(&raw_url)?;