summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-12-21 17:09:10 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-12-21 17:09:10 -0800
commitc1c01caf6b40343bd876e10961829e2b15a9c37c (patch)
treecb26627fe194cbd878ad9c5912dce2feef2c1c8d
parent86f37a5df6f94d7736be736d0a440ae65425d6c9 (diff)
downloadfatcat-c1c01caf6b40343bd876e10961829e2b15a9c37c.tar.gz
fatcat-c1c01caf6b40343bd876e10961829e2b15a9c37c.zip
verify checksum against regexes
-rw-r--r--rust/src/api_entity_crud.rs13
-rw-r--r--rust/src/api_helpers.rs73
-rw-r--r--rust/src/api_server.rs45
-rw-r--r--rust/src/api_wrappers.rs13
-rw-r--r--rust/src/lib.rs6
-rw-r--r--rust/tests/test_api_server_http.rs13
6 files changed, 141 insertions, 22 deletions
diff --git a/rust/src/api_entity_crud.rs b/rust/src/api_entity_crud.rs
index 2f28e858..792e6f9a 100644
--- a/rust/src/api_entity_crud.rs
+++ b/rust/src/api_entity_crud.rs
@@ -885,6 +885,19 @@ impl EntityCrud for FileEntity {
}
fn db_insert_revs(conn: &DbConn, models: &[&Self]) -> Result<Vec<Uuid>> {
+ // first verify hash syntax
+ for entity in models {
+ if let Some(ref hash) = entity.md5 {
+ check_md5(hash)?;
+ }
+ if let Some(ref hash) = entity.sha1 {
+ check_sha1(hash)?;
+ }
+ if let Some(ref hash) = entity.sha256 {
+ check_sha256(hash)?;
+ }
+ }
+
let rev_ids: Vec<Uuid> = insert_into(file_rev::table)
.values(
models
diff --git a/rust/src/api_helpers.rs b/rust/src/api_helpers.rs
index b6525546..32750836 100644
--- a/rust/src/api_helpers.rs
+++ b/rust/src/api_helpers.rs
@@ -391,6 +391,78 @@ fn test_check_orcid() {
assert!(check_orcid("0x23-4567-3456-6780").is_err());
}
+pub fn check_md5(raw: &str) -> Result<()> {
+ lazy_static! {
+ static ref RE: Regex = Regex::new(r"^[a-f0-9]{32}$").unwrap();
+ }
+ if RE.is_match(raw) {
+ Ok(())
+ } else {
+ Err(ErrorKind::MalformedChecksum(format!(
+ "not a valid MD5: '{}' (expected lower-case hex, eg, '1b39813549077b2347c0f370c3864b40')",
+ raw
+ ))
+ .into())
+ }
+}
+
+#[test]
+fn test_check_md5() {
+ assert!(check_md5("1b39813549077b2347c0f370c3864b40").is_ok());
+ assert!(check_md5("1g39813549077b2347c0f370c3864b40").is_err());
+ assert!(check_md5("1B39813549077B2347C0F370c3864b40").is_err());
+ assert!(check_md5("1b39813549077b2347c0f370c3864b4").is_err());
+ assert!(check_md5("1b39813549077b2347c0f370c3864b411").is_err());
+}
+
+pub fn check_sha1(raw: &str) -> Result<()> {
+ lazy_static! {
+ static ref RE: Regex = Regex::new(r"^[a-f0-9]{40}$").unwrap();
+ }
+ if RE.is_match(raw) {
+ Ok(())
+ } else {
+ Err(ErrorKind::MalformedChecksum(format!(
+ "not a valid SHA-1: '{}' (expected lower-case hex, eg, 'e9dd75237c94b209dc3ccd52722de6931a310ba3')",
+ raw
+ ))
+ .into())
+ }
+}
+
+#[test]
+fn test_check_sha1() {
+ assert!(check_sha1("e9dd75237c94b209dc3ccd52722de6931a310ba3").is_ok());
+ assert!(check_sha1("g9dd75237c94b209dc3ccd52722de6931a310ba3").is_err());
+ assert!(check_sha1("e9DD75237C94B209DC3CCD52722de6931a310ba3").is_err());
+ assert!(check_sha1("e9dd75237c94b209dc3ccd52722de6931a310ba").is_err());
+ assert!(check_sha1("e9dd75237c94b209dc3ccd52722de6931a310ba33").is_err());
+}
+
+pub fn check_sha256(raw: &str) -> Result<()> {
+ lazy_static! {
+ static ref RE: Regex = Regex::new(r"^[a-f0-9]{64}$").unwrap();
+ }
+ if RE.is_match(raw) {
+ Ok(())
+ } else {
+ Err(ErrorKind::MalformedChecksum(format!(
+ "not a valid SHA-256: '{}' (expected lower-case hex, eg, 'cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452')",
+ raw
+ ))
+ .into())
+ }
+}
+
+#[test]
+fn test_check_sha256() {
+ assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_ok());
+ assert!(check_sha256("gb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err());
+ assert!(check_sha256("UB1C378F464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err());
+ assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e45").is_err());
+ assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e4522").is_err());
+}
+
pub fn check_release_type(raw: &str) -> Result<()> {
let valid_types = vec![
// Citation Style Language official types
@@ -499,4 +571,3 @@ fn test_check_contrib_role() {
// TODO: make the above checks "more correct"
// TODO: check ISBN-13
-// TODO: check hashes (SHA-1, etc)
diff --git a/rust/src/api_server.rs b/rust/src/api_server.rs
index d03fce07..adc6ab11 100644
--- a/rust/src/api_server.rs
+++ b/rust/src/api_server.rs
@@ -178,24 +178,33 @@ impl Server {
conn: &DbConn,
) -> Result<FileEntity> {
let (ident, rev): (FileIdentRow, FileRevRow) = match (md5, sha1, sha256) {
- (Some(md5), None, None) => file_ident::table
- .inner_join(file_rev::table)
- .filter(file_rev::md5.eq(md5))
- .filter(file_ident::is_live.eq(true))
- .filter(file_ident::redirect_id.is_null())
- .first(conn)?,
- (None, Some(sha1), None) => file_ident::table
- .inner_join(file_rev::table)
- .filter(file_rev::sha1.eq(sha1))
- .filter(file_ident::is_live.eq(true))
- .filter(file_ident::redirect_id.is_null())
- .first(conn)?,
- (None, None, Some(sha256)) => file_ident::table
- .inner_join(file_rev::table)
- .filter(file_rev::sha256.eq(sha256))
- .filter(file_ident::is_live.eq(true))
- .filter(file_ident::redirect_id.is_null())
- .first(conn)?,
+ (Some(md5), None, None) => {
+ check_md5(md5)?;
+ file_ident::table
+ .inner_join(file_rev::table)
+ .filter(file_rev::md5.eq(md5))
+ .filter(file_ident::is_live.eq(true))
+ .filter(file_ident::redirect_id.is_null())
+ .first(conn)?
+ },
+ (None, Some(sha1), None) => {
+ check_sha1(sha1)?;
+ file_ident::table
+ .inner_join(file_rev::table)
+ .filter(file_rev::sha1.eq(sha1))
+ .filter(file_ident::is_live.eq(true))
+ .filter(file_ident::redirect_id.is_null())
+ .first(conn)?
+ },
+ (None, None, Some(sha256)) => {
+ check_sha256(sha256)?;
+ file_ident::table
+ .inner_join(file_rev::table)
+ .filter(file_rev::sha256.eq(sha256))
+ .filter(file_ident::is_live.eq(true))
+ .filter(file_ident::redirect_id.is_null())
+ .first(conn)?
+ },
_ => {
return Err(ErrorKind::MissingOrMultipleExternalId("in lookup".to_string()).into());
}
diff --git a/rust/src/api_wrappers.rs b/rust/src/api_wrappers.rs
index fe9cd793..c3df7d72 100644
--- a/rust/src/api_wrappers.rs
+++ b/rust/src/api_wrappers.rs
@@ -102,6 +102,8 @@ macro_rules! wrap_entity_handlers {
message: ErrorKind::InvalidFatcatId(e).to_string() }),
Err(Error(ErrorKind::MalformedExternalId(e), _)) =>
$post_resp::BadRequest(ErrorResponse { message: e.to_string() }),
+ Err(Error(ErrorKind::MalformedChecksum(e), _)) =>
+ $post_resp::BadRequest(ErrorResponse { message: e.to_string() }),
Err(Error(ErrorKind::NotInControlledVocabulary(e), _)) =>
$post_resp::BadRequest(ErrorResponse { message: e.to_string() }),
Err(Error(ErrorKind::EditgroupAlreadyAccepted(e), _)) =>
@@ -141,6 +143,8 @@ macro_rules! wrap_entity_handlers {
message: ErrorKind::InvalidFatcatId(e).to_string() }),
Err(Error(ErrorKind::MalformedExternalId(e), _)) =>
$post_batch_resp::BadRequest(ErrorResponse { message: e.to_string() }),
+ Err(Error(ErrorKind::MalformedChecksum(e), _)) =>
+ $post_batch_resp::BadRequest(ErrorResponse { message: e.to_string() }),
Err(Error(ErrorKind::NotInControlledVocabulary(e), _)) =>
$post_batch_resp::BadRequest(ErrorResponse { message: e.to_string() }),
Err(Error(ErrorKind::EditgroupAlreadyAccepted(e), _)) =>
@@ -185,6 +189,8 @@ macro_rules! wrap_entity_handlers {
message: ErrorKind::InvalidFatcatId(e).to_string() }),
Err(Error(ErrorKind::MalformedExternalId(e), _)) =>
$update_resp::BadRequest(ErrorResponse { message: e.to_string() }),
+ Err(Error(ErrorKind::MalformedChecksum(e), _)) =>
+ $update_resp::BadRequest(ErrorResponse { message: e.to_string() }),
Err(Error(ErrorKind::NotInControlledVocabulary(e), _)) =>
$update_resp::BadRequest(ErrorResponse { message: e.to_string() }),
Err(Error(ErrorKind::EditgroupAlreadyAccepted(e), _)) =>
@@ -431,6 +437,8 @@ macro_rules! wrap_lookup_handler {
$get_resp::NotFound(ErrorResponse { message: format!("Not found: {:?} / {:?}", $idname, wikidata_qid) }),
Err(Error(ErrorKind::MalformedExternalId(e), _)) =>
$get_resp::BadRequest(ErrorResponse { message: e.to_string() }),
+ Err(Error(ErrorKind::MalformedChecksum(e), _)) =>
+ $get_resp::BadRequest(ErrorResponse { message: e.to_string() }),
Err(Error(ErrorKind::MissingOrMultipleExternalId(e), _)) => {
$get_resp::BadRequest(ErrorResponse { message: e.to_string(), }) },
Err(Error(ErrorKind::OtherBadRequest(e), _)) =>
@@ -706,6 +714,11 @@ impl Api for Server {
message: e.to_string(),
})
}
+ Err(Error(ErrorKind::MalformedChecksum(e), _)) => {
+ LookupFileResponse::BadRequest(ErrorResponse {
+ message: e.to_string(),
+ })
+ }
Err(Error(ErrorKind::MissingOrMultipleExternalId(e), _)) => {
LookupFileResponse::BadRequest(ErrorResponse {
message: e.to_string(),
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index b7f1817a..0bed3471 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -49,7 +49,11 @@ pub mod errors {
}
MalformedExternalId(id: String) {
description("external identifier doesn't match required pattern")
- display("external identifier doesn't match required pattern")
+ display("external identifier doesn't match required pattern: {}", id)
+ }
+ MalformedChecksum(hash: String) {
+ description("checksum doesn't match required pattern (hex encoding)")
+ display("checksum doesn't match required pattern (hex encoding): {}", hash)
}
NotInControlledVocabulary(word: String) {
description("word or type not correct for controlled vocabulary")
diff --git a/rust/tests/test_api_server_http.rs b/rust/tests/test_api_server_http.rs
index 031a0abf..714cfc68 100644
--- a/rust/tests/test_api_server_http.rs
+++ b/rust/tests/test_api_server_http.rs
@@ -267,7 +267,7 @@ fn test_lookups() {
check_http_response(
request::get(
- "http://localhost:9411/v0/file/lookup?md5=7d97e98f8af710c7e7fe703abc8f639e0ee507c4",
+ "http://localhost:9411/v0/file/lookup?md5=00000000000ab9fdc2a128f962faebff",
headers.clone(),
&router,
),
@@ -276,6 +276,15 @@ fn test_lookups() {
);
check_http_response(
request::get(
+ "http://localhost:9411/v0/file/lookup?md5=00000000000ab9fdc2a128f962faebfff",
+ headers.clone(),
+ &router,
+ ),
+ status::BadRequest,
+ None,
+ );
+ check_http_response(
+ request::get(
"http://localhost:9411/v0/file/lookup?md5=f4de91152c7ab9fdc2a128f962faebff",
headers.clone(),
&router,
@@ -304,7 +313,7 @@ fn test_lookups() {
check_http_response(
request::get(
- "http://localhost:9411/v0/file/lookup?sha1=7d97e98f8af710c7e7fe703abc8f000000000000",
+ "http://localhost:9411/v0/file/lookup?sha1=00000000000000c7e7fe703abc8f639e0ee507c4",
headers.clone(),
&router,
),