diff options
-rw-r--r-- | fatcat-cli/src/download.rs | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/fatcat-cli/src/download.rs b/fatcat-cli/src/download.rs index a0f0eeb..53c5a41 100644 --- a/fatcat-cli/src/download.rs +++ b/fatcat-cli/src/download.rs @@ -148,13 +148,25 @@ pub fn download_file( let download_path = final_path.with_extension("partial_download"); - // TODO: only archive.org URLs (?) let raw_url = match fe.urls.as_ref() { None => return Ok(DownloadStatus::NoPublicFile), Some(url_list) if url_list.is_empty() => return Ok(DownloadStatus::NoPublicFile), // TODO: remove clone (?) - // TODO: better heuristic than "just try first URL" - Some(url_list) => url_list[0].url.clone(), + Some(url_list) => { + // prefer an IA_hosted URL, but fallback to any URL + let public_url_list: Vec<&fatcat_openapi::models::FileUrl> = url_list.iter().filter(|v| { + if let Ok(url) = Url::parse(&v.url) { + url.host_str() == Some("web.archive.org") || url.host_str() == Some("archive.org") + } else { + false + } + }).collect(); + if !public_url_list.is_empty() { + public_url_list[0].url.clone() + } else { + url_list[0].url.clone() + } + }, }; let mut url = Url::parse(&raw_url)?; |