diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-04-09 19:33:08 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-04-09 19:33:08 -0700 |
commit | 61e52b98a14ea493ea9fd3e7be84ca4e4d04b993 (patch) | |
tree | c9afc8fa46f9d8376ea41a8965850e05a617860a | |
parent | 3aa7276e7ef5b9e8276328b24dff3b9aef7e7367 (diff) | |
download | fatcat-cli-61e52b98a14ea493ea9fd3e7be84ca4e4d04b993.tar.gz fatcat-cli-61e52b98a14ea493ea9fd3e7be84ca4e4d04b993.zip |
simple preference for IA URLs (if available)
-rw-r--r-- | fatcat-cli/src/download.rs | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/fatcat-cli/src/download.rs b/fatcat-cli/src/download.rs index a0f0eeb..53c5a41 100644 --- a/fatcat-cli/src/download.rs +++ b/fatcat-cli/src/download.rs @@ -148,13 +148,25 @@ pub fn download_file( let download_path = final_path.with_extension("partial_download"); - // TODO: only archive.org URLs (?) let raw_url = match fe.urls.as_ref() { None => return Ok(DownloadStatus::NoPublicFile), Some(url_list) if url_list.is_empty() => return Ok(DownloadStatus::NoPublicFile), // TODO: remove clone (?) - // TODO: better heuristic than "just try first URL" - Some(url_list) => url_list[0].url.clone(), + Some(url_list) => { + // prefer an IA_hosted URL, but fallback to any URL + let public_url_list: Vec<&fatcat_openapi::models::FileUrl> = url_list.iter().filter(|v| { + if let Ok(url) = Url::parse(&v.url) { + url.host_str() == Some("web.archive.org") || url.host_str() == Some("archive.org") + } else { + false + } + }).collect(); + if !public_url_list.is_empty() { + public_url_list[0].url.clone() + } else { + url_list[0].url.clone() + } + }, }; let mut url = Url::parse(&raw_url)?; |