diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-12-21 17:09:10 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-12-21 17:09:10 -0800 |
commit | c1c01caf6b40343bd876e10961829e2b15a9c37c (patch) | |
tree | cb26627fe194cbd878ad9c5912dce2feef2c1c8d /rust/src | |
parent | 86f37a5df6f94d7736be736d0a440ae65425d6c9 (diff) | |
download | fatcat-c1c01caf6b40343bd876e10961829e2b15a9c37c.tar.gz fatcat-c1c01caf6b40343bd876e10961829e2b15a9c37c.zip |
verify checksum against regexes
Diffstat (limited to 'rust/src')
-rw-r--r-- | rust/src/api_entity_crud.rs | 13 | ||||
-rw-r--r-- | rust/src/api_helpers.rs | 73 | ||||
-rw-r--r-- | rust/src/api_server.rs | 45 | ||||
-rw-r--r-- | rust/src/api_wrappers.rs | 13 | ||||
-rw-r--r-- | rust/src/lib.rs | 6 |
5 files changed, 130 insertions, 20 deletions
diff --git a/rust/src/api_entity_crud.rs b/rust/src/api_entity_crud.rs index 2f28e858..792e6f9a 100644 --- a/rust/src/api_entity_crud.rs +++ b/rust/src/api_entity_crud.rs @@ -885,6 +885,19 @@ impl EntityCrud for FileEntity { } fn db_insert_revs(conn: &DbConn, models: &[&Self]) -> Result<Vec<Uuid>> { + // first verify hash syntax + for entity in models { + if let Some(ref hash) = entity.md5 { + check_md5(hash)?; + } + if let Some(ref hash) = entity.sha1 { + check_sha1(hash)?; + } + if let Some(ref hash) = entity.sha256 { + check_sha256(hash)?; + } + } + let rev_ids: Vec<Uuid> = insert_into(file_rev::table) .values( models diff --git a/rust/src/api_helpers.rs b/rust/src/api_helpers.rs index b6525546..32750836 100644 --- a/rust/src/api_helpers.rs +++ b/rust/src/api_helpers.rs @@ -391,6 +391,78 @@ fn test_check_orcid() { assert!(check_orcid("0x23-4567-3456-6780").is_err()); } +pub fn check_md5(raw: &str) -> Result<()> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[a-f0-9]{32}$").unwrap(); + } + if RE.is_match(raw) { + Ok(()) + } else { + Err(ErrorKind::MalformedChecksum(format!( + "not a valid MD5: '{}' (expected lower-case hex, eg, '1b39813549077b2347c0f370c3864b40')", + raw + )) + .into()) + } +} + +#[test] +fn test_check_md5() { + assert!(check_md5("1b39813549077b2347c0f370c3864b40").is_ok()); + assert!(check_md5("1g39813549077b2347c0f370c3864b40").is_err()); + assert!(check_md5("1B39813549077B2347C0F370c3864b40").is_err()); + assert!(check_md5("1b39813549077b2347c0f370c3864b4").is_err()); + assert!(check_md5("1b39813549077b2347c0f370c3864b411").is_err()); +} + +pub fn check_sha1(raw: &str) -> Result<()> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[a-f0-9]{40}$").unwrap(); + } + if RE.is_match(raw) { + Ok(()) + } else { + Err(ErrorKind::MalformedChecksum(format!( + "not a valid SHA-1: '{}' (expected lower-case hex, eg, 'e9dd75237c94b209dc3ccd52722de6931a310ba3')", + raw + )) + .into()) + } +} + +#[test] +fn test_check_sha1() { + assert!(check_sha1("e9dd75237c94b209dc3ccd52722de6931a310ba3").is_ok()); + assert!(check_sha1("g9dd75237c94b209dc3ccd52722de6931a310ba3").is_err()); + assert!(check_sha1("e9DD75237C94B209DC3CCD52722de6931a310ba3").is_err()); + assert!(check_sha1("e9dd75237c94b209dc3ccd52722de6931a310ba").is_err()); + assert!(check_sha1("e9dd75237c94b209dc3ccd52722de6931a310ba33").is_err()); +} + +pub fn check_sha256(raw: &str) -> Result<()> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[a-f0-9]{64}$").unwrap(); + } + if RE.is_match(raw) { + Ok(()) + } else { + Err(ErrorKind::MalformedChecksum(format!( + "not a valid SHA-256: '{}' (expected lower-case hex, eg, 'cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452')", + raw + )) + .into()) + } +} + +#[test] +fn test_check_sha256() { + assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_ok()); + assert!(check_sha256("gb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err()); + assert!(check_sha256("UB1C378F464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err()); + assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e45").is_err()); + assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e4522").is_err()); +} + pub fn check_release_type(raw: &str) -> Result<()> { let valid_types = vec![ // Citation Style Language official types @@ -499,4 +571,3 @@ fn test_check_contrib_role() { // TODO: make the above checks "more correct" // TODO: check ISBN-13 -// TODO: check hashes (SHA-1, etc) diff --git a/rust/src/api_server.rs b/rust/src/api_server.rs index d03fce07..adc6ab11 100644 --- a/rust/src/api_server.rs +++ b/rust/src/api_server.rs @@ -178,24 +178,33 @@ impl Server { conn: &DbConn, ) -> Result<FileEntity> { let (ident, rev): (FileIdentRow, FileRevRow) = match (md5, sha1, sha256) { - (Some(md5), None, None) => file_ident::table - .inner_join(file_rev::table) - .filter(file_rev::md5.eq(md5)) - .filter(file_ident::is_live.eq(true)) - .filter(file_ident::redirect_id.is_null()) - .first(conn)?, - (None, Some(sha1), None) => file_ident::table - .inner_join(file_rev::table) - .filter(file_rev::sha1.eq(sha1)) - .filter(file_ident::is_live.eq(true)) - .filter(file_ident::redirect_id.is_null()) - .first(conn)?, - (None, None, Some(sha256)) => file_ident::table - .inner_join(file_rev::table) - .filter(file_rev::sha256.eq(sha256)) - .filter(file_ident::is_live.eq(true)) - .filter(file_ident::redirect_id.is_null()) - .first(conn)?, + (Some(md5), None, None) => { + check_md5(md5)?; + file_ident::table + .inner_join(file_rev::table) + .filter(file_rev::md5.eq(md5)) + .filter(file_ident::is_live.eq(true)) + .filter(file_ident::redirect_id.is_null()) + .first(conn)? + }, + (None, Some(sha1), None) => { + check_sha1(sha1)?; + file_ident::table + .inner_join(file_rev::table) + .filter(file_rev::sha1.eq(sha1)) + .filter(file_ident::is_live.eq(true)) + .filter(file_ident::redirect_id.is_null()) + .first(conn)? + }, + (None, None, Some(sha256)) => { + check_sha256(sha256)?; + file_ident::table + .inner_join(file_rev::table) + .filter(file_rev::sha256.eq(sha256)) + .filter(file_ident::is_live.eq(true)) + .filter(file_ident::redirect_id.is_null()) + .first(conn)? + }, _ => { return Err(ErrorKind::MissingOrMultipleExternalId("in lookup".to_string()).into()); } diff --git a/rust/src/api_wrappers.rs b/rust/src/api_wrappers.rs index fe9cd793..c3df7d72 100644 --- a/rust/src/api_wrappers.rs +++ b/rust/src/api_wrappers.rs @@ -102,6 +102,8 @@ macro_rules! wrap_entity_handlers { message: ErrorKind::InvalidFatcatId(e).to_string() }), Err(Error(ErrorKind::MalformedExternalId(e), _)) => $post_resp::BadRequest(ErrorResponse { message: e.to_string() }), + Err(Error(ErrorKind::MalformedChecksum(e), _)) => + $post_resp::BadRequest(ErrorResponse { message: e.to_string() }), Err(Error(ErrorKind::NotInControlledVocabulary(e), _)) => $post_resp::BadRequest(ErrorResponse { message: e.to_string() }), Err(Error(ErrorKind::EditgroupAlreadyAccepted(e), _)) => @@ -141,6 +143,8 @@ macro_rules! wrap_entity_handlers { message: ErrorKind::InvalidFatcatId(e).to_string() }), Err(Error(ErrorKind::MalformedExternalId(e), _)) => $post_batch_resp::BadRequest(ErrorResponse { message: e.to_string() }), + Err(Error(ErrorKind::MalformedChecksum(e), _)) => + $post_batch_resp::BadRequest(ErrorResponse { message: e.to_string() }), Err(Error(ErrorKind::NotInControlledVocabulary(e), _)) => $post_batch_resp::BadRequest(ErrorResponse { message: e.to_string() }), Err(Error(ErrorKind::EditgroupAlreadyAccepted(e), _)) => @@ -185,6 +189,8 @@ macro_rules! wrap_entity_handlers { message: ErrorKind::InvalidFatcatId(e).to_string() }), Err(Error(ErrorKind::MalformedExternalId(e), _)) => $update_resp::BadRequest(ErrorResponse { message: e.to_string() }), + Err(Error(ErrorKind::MalformedChecksum(e), _)) => + $update_resp::BadRequest(ErrorResponse { message: e.to_string() }), Err(Error(ErrorKind::NotInControlledVocabulary(e), _)) => $update_resp::BadRequest(ErrorResponse { message: e.to_string() }), Err(Error(ErrorKind::EditgroupAlreadyAccepted(e), _)) => @@ -431,6 +437,8 @@ macro_rules! wrap_lookup_handler { $get_resp::NotFound(ErrorResponse { message: format!("Not found: {:?} / {:?}", $idname, wikidata_qid) }), Err(Error(ErrorKind::MalformedExternalId(e), _)) => $get_resp::BadRequest(ErrorResponse { message: e.to_string() }), + Err(Error(ErrorKind::MalformedChecksum(e), _)) => + $get_resp::BadRequest(ErrorResponse { message: e.to_string() }), Err(Error(ErrorKind::MissingOrMultipleExternalId(e), _)) => { $get_resp::BadRequest(ErrorResponse { message: e.to_string(), }) }, Err(Error(ErrorKind::OtherBadRequest(e), _)) => @@ -706,6 +714,11 @@ impl Api for Server { message: e.to_string(), }) } + Err(Error(ErrorKind::MalformedChecksum(e), _)) => { + LookupFileResponse::BadRequest(ErrorResponse { + message: e.to_string(), + }) + } Err(Error(ErrorKind::MissingOrMultipleExternalId(e), _)) => { LookupFileResponse::BadRequest(ErrorResponse { message: e.to_string(), diff --git a/rust/src/lib.rs b/rust/src/lib.rs index b7f1817a..0bed3471 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -49,7 +49,11 @@ pub mod errors { } MalformedExternalId(id: String) { description("external identifier doesn't match required pattern") - display("external identifier doesn't match required pattern") + display("external identifier doesn't match required pattern: {}", id) + } + MalformedChecksum(hash: String) { + description("checksum doesn't match required pattern (hex encoding)") + display("checksum doesn't match required pattern (hex encoding): {}", hash) } NotInControlledVocabulary(word: String) { description("word or type not correct for controlled vocabulary") |