aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-11-19 13:15:24 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-11-19 14:55:15 -0800
commit7fa96bab1bb4d1a99048cf9398fd3e1c8a4bf78a (patch)
treeaf898a5374fcecb03d9c4783c3cd025c1237a37c
parent40b5d40bed9e20e5c7a40d1741601a8317e078d8 (diff)
downloadfatcat-7fa96bab1bb4d1a99048cf9398fd3e1c8a4bf78a.tar.gz
fatcat-7fa96bab1bb4d1a99048cf9398fd3e1c8a4bf78a.zip
update fatcatd rust code for 'oai' external identifier
-rw-r--r--rust/src/endpoint_handlers.rs150
-rw-r--r--rust/src/endpoints.rs2
-rw-r--r--rust/src/entity_crud.rs10
-rw-r--r--rust/src/identifiers.rs38
4 files changed, 189 insertions, 11 deletions
diff --git a/rust/src/endpoint_handlers.rs b/rust/src/endpoint_handlers.rs
index 1b7bd0b6..91ea2393 100644
--- a/rust/src/endpoint_handlers.rs
+++ b/rust/src/endpoint_handlers.rs
@@ -263,6 +263,7 @@ impl Server {
mag: &Option<String>,
doaj: &Option<String>,
dblp: &Option<String>,
+ oai: &Option<String>,
expand_flags: ExpandFlags,
hide_flags: HideFlags,
) -> Result<ReleaseEntity> {
@@ -279,8 +280,9 @@ impl Server {
mag,
doaj,
dblp,
+ oai,
) {
- (Some(doi), None, None, None, None, None, None, None, None, None, None, None) => {
+ (Some(doi), None, None, None, None, None, None, None, None, None, None, None, None) => {
// DOIs always stored lower-case; lookups are case-insensitive
let doi = doi.to_lowercase();
check_doi(&doi)?;
@@ -304,6 +306,7 @@ impl Server {
None,
None,
None,
+ None,
) => {
check_wikidata_qid(wikidata_qid)?;
release_ident::table
@@ -313,7 +316,21 @@ impl Server {
.filter(release_ident::redirect_id.is_null())
.first(conn)?
}
- (None, None, Some(isbn13), None, None, None, None, None, None, None, None, None) => {
+ (
+ None,
+ None,
+ Some(isbn13),
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ ) => {
check_isbn13(isbn13)?;
let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) =
release_rev::table
@@ -326,7 +343,21 @@ impl Server {
.first(conn)?;
(ident, rev)
}
- (None, None, None, Some(pmid), None, None, None, None, None, None, None, None) => {
+ (
+ None,
+ None,
+ None,
+ Some(pmid),
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ ) => {
check_pmid(pmid)?;
release_ident::table
.inner_join(release_rev::table)
@@ -335,7 +366,21 @@ impl Server {
.filter(release_ident::redirect_id.is_null())
.first(conn)?
}
- (None, None, None, None, Some(pmcid), None, None, None, None, None, None, None) => {
+ (
+ None,
+ None,
+ None,
+ None,
+ Some(pmcid),
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ ) => {
check_pmcid(pmcid)?;
release_ident::table
.inner_join(release_rev::table)
@@ -344,7 +389,21 @@ impl Server {
.filter(release_ident::redirect_id.is_null())
.first(conn)?
}
- (None, None, None, None, None, Some(core), None, None, None, None, None, None) => {
+ (
+ None,
+ None,
+ None,
+ None,
+ None,
+ Some(core),
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ ) => {
check_core_id(core)?;
release_ident::table
.inner_join(release_rev::table)
@@ -353,7 +412,21 @@ impl Server {
.filter(release_ident::redirect_id.is_null())
.first(conn)?
}
- (None, None, None, None, None, None, Some(arxiv), None, None, None, None, None) => {
+ (
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ Some(arxiv),
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ ) => {
// TODO: this allows only lookup by full, versioned arxiv identifier. Probably also
// want to allow lookup by "work" style identifier?
check_arxiv_id(arxiv)?;
@@ -368,7 +441,21 @@ impl Server {
.first(conn)?;
(ident, rev)
}
- (None, None, None, None, None, None, None, Some(jstor), None, None, None, None) => {
+ (
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ Some(jstor),
+ None,
+ None,
+ None,
+ None,
+ None,
+ ) => {
check_jstor_id(jstor)?;
let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) =
release_rev::table
@@ -381,7 +468,7 @@ impl Server {
.first(conn)?;
(ident, rev)
}
- (None, None, None, None, None, None, None, None, Some(ark), None, None, None) => {
+ (None, None, None, None, None, None, None, None, Some(ark), None, None, None, None) => {
check_ark_id(ark)?;
let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) =
release_rev::table
@@ -394,7 +481,7 @@ impl Server {
.first(conn)?;
(ident, rev)
}
- (None, None, None, None, None, None, None, None, None, Some(mag), None, None) => {
+ (None, None, None, None, None, None, None, None, None, Some(mag), None, None, None) => {
check_mag_id(mag)?;
let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) =
release_rev::table
@@ -407,7 +494,21 @@ impl Server {
.first(conn)?;
(ident, rev)
}
- (None, None, None, None, None, None, None, None, None, None, Some(doaj), None) => {
+ (
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ Some(doaj),
+ None,
+ None,
+ ) => {
check_doaj_id(doaj)?;
let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) =
release_rev::table
@@ -420,7 +521,21 @@ impl Server {
.first(conn)?;
(ident, rev)
}
- (None, None, None, None, None, None, None, None, None, None, None, Some(dblp)) => {
+ (
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ Some(dblp),
+ None,
+ ) => {
check_dblp_id(dblp)?;
let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) =
release_rev::table
@@ -433,6 +548,19 @@ impl Server {
.first(conn)?;
(ident, rev)
}
+ (None, None, None, None, None, None, None, None, None, None, None, None, Some(oai)) => {
+ check_oai_id(oai)?;
+ let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) =
+ release_rev::table
+ .inner_join(release_ident::table)
+ .inner_join(release_rev_extid::table)
+ .filter(release_rev_extid::extid_type.eq("oai".to_string()))
+ .filter(release_rev_extid::value.eq(oai))
+ .filter(release_ident::is_live.eq(true))
+ .filter(release_ident::redirect_id.is_null())
+ .first(conn)?;
+ (ident, rev)
+ }
_ => {
return Err(
FatcatError::MissingOrMultipleExternalId("in lookup".to_string()).into(),
diff --git a/rust/src/endpoints.rs b/rust/src/endpoints.rs
index 7ac0a068..0dd232c6 100644
--- a/rust/src/endpoints.rs
+++ b/rust/src/endpoints.rs
@@ -739,6 +739,7 @@ impl Api for Server {
mag: Option<String>,
doaj: Option<String>,
dblp: Option<String>,
+ oai: Option<String>,
expand: Option<String>,
hide: Option<String>,
_context: &Context,
@@ -768,6 +769,7 @@ impl Api for Server {
&mag,
&doaj,
&dblp,
+ &oai,
expand_flags,
hide_flags,
)
diff --git a/rust/src/entity_crud.rs b/rust/src/entity_crud.rs
index bac8c0fc..0d72788d 100644
--- a/rust/src/entity_crud.rs
+++ b/rust/src/entity_crud.rs
@@ -1746,6 +1746,7 @@ impl EntityCrud for ReleaseEntity {
mag: None,
doaj: None,
dblp: None,
+ oai: None,
},
refs: None,
contribs: None,
@@ -2024,6 +2025,7 @@ impl EntityCrud for ReleaseEntity {
mag: None,
doaj: None,
dblp: None,
+ oai: None,
};
let extid_rows: Vec<ReleaseExtidRow> = release_rev_extid::table
@@ -2038,6 +2040,7 @@ impl EntityCrud for ReleaseEntity {
"mag" => ext_ids.mag = Some(extid_row.value),
"doaj" => ext_ids.doaj = Some(extid_row.value),
"dblp" => ext_ids.dblp = Some(extid_row.value),
+ "oai" => ext_ids.oai = Some(extid_row.value),
_ => (),
}
}
@@ -2312,6 +2315,13 @@ impl EntityCrud for ReleaseEntity {
value: extid.clone(),
});
};
+ if let Some(extid) = &model.ext_ids.oai {
+ release_extid_rows.push(ReleaseExtidRow {
+ release_rev: *rev_id,
+ extid_type: "oai".to_string(),
+ value: extid.clone(),
+ });
+ };
}
for (model, rev_id) in models.iter().zip(rev_ids.iter()) {
diff --git a/rust/src/identifiers.rs b/rust/src/identifiers.rs
index 22ffcc79..76f978f9 100644
--- a/rust/src/identifiers.rs
+++ b/rust/src/identifiers.rs
@@ -411,6 +411,44 @@ fn test_check_dblp_id() {
assert!(check_dblp_id("").is_err());
}
+pub fn check_oai_id(raw: &str) -> Result<()> {
+ lazy_static! {
+ // http://www.openarchives.org/OAI/2.0/guidelines-oai-identifier.htm
+ static ref RE: Regex = Regex::new(r"^oai:[a-zA-Z][a-zA-Z0-9\-]*(\.[a-zA-Z][a-zA-Z0-9\-]*)+:[a-zA-Z0-9\-_\.!~\*'\(\);/\?:@&=\+$,%]+$").unwrap();
+ }
+ if raw.is_ascii() && RE.is_match(raw) {
+ Ok(())
+ } else {
+ Err(FatcatError::MalformedChecksum(
+ "OAI-PMH identifier (expected, eg, 'oai:foo.org:some-local-id-54')".to_string(),
+ raw.to_string(),
+ ))?
+ }
+}
+
+#[test]
+fn test_check_oai_id() {
+ assert!(check_oai_id("journals/entcs/GoubaultM12").is_err());
+ assert!(check_oai_id("10.123*").is_err());
+ assert!(check_oai_id("").is_err());
+ assert!(check_oai_id("something:arXiv.org:hep-th/9901001").is_err()); // bad schema
+ assert!(check_oai_id("oai:999:abc123").is_err()); // namespace-identifier must not start with digit
+ assert!(check_oai_id("oai:wibble:abc123").is_err()); // namespace-identifier must be domain name
+ assert!(check_oai_id("oai:wibble.org:ab cd").is_err()); // space not permitted (must be escaped as %20)
+ assert!(check_oai_id("oai:wibble.org:ab#cd").is_err()); // # not permitted
+ assert!(check_oai_id("oai:wibble.org:ab<cd").is_err()); // < not permitted
+ // the "official" regex used above allows this case
+ //assert!(check_oai_id("oai:wibble.org:ab%3ccd").is_err()); // < must be escaped at %3C not %3c
+
+ assert!(check_oai_id("oai:arXiv.org:hep-th/9901001").is_ok());
+ assert!(check_oai_id("oai:foo.org:some-local-id-53").is_ok());
+ assert!(check_oai_id("oai:FOO.ORG:some-local-id-53").is_ok());
+ assert!(check_oai_id("oai:foo.org:some-local-id-54").is_ok());
+ assert!(check_oai_id("oai:foo.org:Some-Local-Id-54").is_ok());
+ assert!(check_oai_id("oai:wibble.org:ab%20cd").is_ok());
+ assert!(check_oai_id("oai:wibble.org:ab?cd").is_ok());
+}
+
pub fn check_issn(raw: &str) -> Result<()> {
lazy_static! {
static ref RE: Regex = Regex::new(r"^\d{4}-\d{3}[0-9X]$").unwrap();