From 0bc5118ebf944d1754409dc742552ed1b543346a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 10 May 2019 15:06:00 -0700 Subject: basic impl of extid changes --- rust/src/database_models.rs | 18 +++--- rust/src/database_schema.rs | 15 +++-- rust/src/endpoint_handlers.rs | 116 ++++++++++++++++++++++---------------- rust/src/endpoints.rs | 22 ++++---- rust/src/entity_crud.rs | 128 +++++++++++++++++++++++++++++++----------- 5 files changed, 191 insertions(+), 108 deletions(-) (limited to 'rust/src') diff --git a/rust/src/database_models.rs b/rust/src/database_models.rs index 3ee7552c..60d6922c 100644 --- a/rust/src/database_models.rs +++ b/rust/src/database_models.rs @@ -389,10 +389,7 @@ pub struct ReleaseRevRow { pub pmid: Option, pub pmcid: Option, pub wikidata_qid: Option, - pub isbn13: Option, pub core_id: Option, - pub arxiv_id: Option, - pub jstor_id: Option, pub volume: Option, pub issue: Option, pub pages: Option, @@ -405,8 +402,6 @@ pub struct ReleaseRevRow { pub withdrawn_status: Option, pub withdrawn_date: Option, pub withdrawn_year: Option, - pub mag_id: Option, - pub ark_id: Option, } #[derive(Debug, Associations, AsChangeset, Insertable)] @@ -426,10 +421,7 @@ pub struct ReleaseRevNewRow { pub pmid: Option, pub pmcid: Option, pub wikidata_qid: Option, - pub isbn13: Option, pub core_id: Option, - pub arxiv_id: Option, - pub jstor_id: Option, pub volume: Option, pub issue: Option, pub pages: Option, @@ -442,8 +434,14 @@ pub struct ReleaseRevNewRow { pub withdrawn_status: Option, pub withdrawn_date: Option, pub withdrawn_year: Option, - pub mag_id: Option, - pub ark_id: Option, +} + +#[derive(Debug, Queryable, Associations, AsChangeset, Insertable)] +#[table_name = "release_rev_extid"] +pub struct ReleaseExtidRow { + pub release_rev: Uuid, + pub extid_type: String, + pub value: String, } entity_structs!( diff --git a/rust/src/database_schema.rs b/rust/src/database_schema.rs index ae6a5464..46baba59 100644 --- a/rust/src/database_schema.rs +++ b/rust/src/database_schema.rs @@ -306,10 +306,7 @@ table! { pmid -> Nullable, pmcid -> Nullable, wikidata_qid -> Nullable, - isbn13 -> Nullable, core_id -> Nullable, - arxiv_id -> Nullable, - jstor_id -> Nullable, volume -> Nullable, issue -> Nullable, pages -> Nullable, @@ -322,8 +319,6 @@ table! { withdrawn_status -> Nullable, withdrawn_date -> Nullable, withdrawn_year -> Nullable, - mag_id -> Nullable, - ark_id -> Nullable, } } @@ -337,6 +332,14 @@ table! { } } +table! { + release_rev_extid (release_rev, extid_type) { + release_rev -> Uuid, + extid_type -> Text, + value -> Text, + } +} + table! { webcapture_edit (id) { id -> Uuid, @@ -459,6 +462,7 @@ joinable!(release_rev -> refs_blob (refs_blob_sha1)); joinable!(release_rev -> work_ident (work_ident_id)); joinable!(release_rev_abstract -> abstracts (abstract_sha1)); joinable!(release_rev_abstract -> release_rev (release_rev)); +joinable!(release_rev_extid -> release_rev (release_rev)); joinable!(webcapture_edit -> editgroup (editgroup_id)); joinable!(webcapture_ident -> webcapture_rev (rev_id)); joinable!(webcapture_rev_cdx -> webcapture_rev (webcapture_rev)); @@ -499,6 +503,7 @@ allow_tables_to_appear_in_same_query!( release_ref, release_rev, release_rev_abstract, + release_rev_extid, webcapture_edit, webcapture_ident, webcapture_rev, diff --git a/rust/src/endpoint_handlers.rs b/rust/src/endpoint_handlers.rs index cd2f1afa..ab3b81ce 100644 --- a/rust/src/endpoint_handlers.rs +++ b/rust/src/endpoint_handlers.rs @@ -260,11 +260,11 @@ impl Server { isbn13: &Option, pmid: &Option, pmcid: &Option, - core_id: &Option, - arxiv_id: &Option, - jstor_id: &Option, - ark_id: &Option, - mag_id: &Option, + core: &Option, + arxiv: &Option, + jstor: &Option, + ark: &Option, + mag: &Option, expand_flags: ExpandFlags, hide_flags: HideFlags, ) -> Result { @@ -274,11 +274,11 @@ impl Server { isbn13, pmid, pmcid, - core_id, - arxiv_id, - jstor_id, - ark_id, - mag_id, + core, + arxiv, + jstor, + ark, + mag, ) { (Some(doi), None, None, None, None, None, None, None, None, None) => { // DOIs always stored lower-case; lookups are case-insensitive @@ -302,12 +302,16 @@ impl Server { } (None, None, Some(isbn13), None, None, None, None, None, None, None) => { // TODO: check_isbn13(isbn13)?; - release_ident::table - .inner_join(release_rev::table) - .filter(release_rev::isbn13.eq(isbn13)) - .filter(release_ident::is_live.eq(true)) - .filter(release_ident::redirect_id.is_null()) - .first(conn)? + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("isbn13".to_string())) + .filter(release_rev_extid::value.eq(isbn13)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) } (None, None, None, Some(pmid), None, None, None, None, None, None) => { check_pmid(pmid)?; @@ -327,50 +331,66 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, None, Some(core_id), None, None, None, None) => { - // TODO: check_core_id(core_id)?; + (None, None, None, None, None, Some(core), None, None, None, None) => { + // TODO: check_core_id(core)?; release_ident::table .inner_join(release_rev::table) - .filter(release_rev::core_id.eq(core_id)) + .filter(release_rev::core_id.eq(core)) .filter(release_ident::is_live.eq(true)) .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, None, None, Some(arxiv_id), None, None, None) => { + (None, None, None, None, None, None, Some(arxiv), None, None, None) => { // TODO: check_arxiv_id(arxiv_id)?; - release_ident::table - .inner_join(release_rev::table) - .filter(release_rev::arxiv_id.eq(arxiv_id)) - .filter(release_ident::is_live.eq(true)) - .filter(release_ident::redirect_id.is_null()) - .first(conn)? + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("arxiv".to_string())) + .filter(release_rev_extid::value.eq(arxiv)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) } - (None, None, None, None, None, None, None, Some(jstor_id), None, None) => { + (None, None, None, None, None, None, None, Some(jstor), None, None) => { // TODO: check_jstor_id(jstor_id)?; - release_ident::table - .inner_join(release_rev::table) - .filter(release_rev::jstor_id.eq(jstor_id)) - .filter(release_ident::is_live.eq(true)) - .filter(release_ident::redirect_id.is_null()) - .first(conn)? + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("jstor".to_string())) + .filter(release_rev_extid::value.eq(jstor)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) } - (None, None, None, None, None, None, None, None, Some(ark_id), None) => { + (None, None, None, None, None, None, None, None, Some(ark), None) => { // TODO: check_ark_id(ark_id)?; - release_ident::table - .inner_join(release_rev::table) - .filter(release_rev::ark_id.eq(ark_id)) - .filter(release_ident::is_live.eq(true)) - .filter(release_ident::redirect_id.is_null()) - .first(conn)? + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("ark".to_string())) + .filter(release_rev_extid::value.eq(ark)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) } - (None, None, None, None, None, None, None, None, None, Some(mag_id)) => { - // TODO: check_ark_id(ark_id)?; - release_ident::table - .inner_join(release_rev::table) - .filter(release_rev::mag_id.eq(mag_id)) - .filter(release_ident::is_live.eq(true)) - .filter(release_ident::redirect_id.is_null()) - .first(conn)? + (None, None, None, None, None, None, None, None, None, Some(mag)) => { + // TODO: check_mag_id(mag_id)?; + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("mag".to_string())) + .filter(release_rev_extid::value.eq(mag)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) } _ => { return Err( diff --git a/rust/src/endpoints.rs b/rust/src/endpoints.rs index d13a760e..4817184f 100644 --- a/rust/src/endpoints.rs +++ b/rust/src/endpoints.rs @@ -670,11 +670,11 @@ impl Api for Server { isbn13: Option, pmid: Option, pmcid: Option, - core_id: Option, - arxiv_id: Option, - jstor_id: Option, - ark_id: Option, - mag_id: Option, + core: Option, + arxiv: Option, + jstor: Option, + ark: Option, + mag: Option, expand: Option, hide: Option, _context: &Context, @@ -697,11 +697,11 @@ impl Api for Server { &isbn13, &pmid, &pmcid, - &core_id, - &arxiv_id, - &jstor_id, - &ark_id, - &mag_id, + &core, + &arxiv, + &jstor, + &ark, + &mag, expand_flags, hide_flags, ) @@ -709,7 +709,7 @@ impl Api for Server { { Ok(entity) => LookupReleaseResponse::FoundEntity(entity), // TODO: ensure good 'Not Found" error message here - // (was: "Not found: {:?} / {:?} / {:?} / {:?} / {:?} / {:?}", doi, wikidata_qid, isbn13, pmid, pmcid, core_id + // (was: "Not found: {:?} / {:?} / {:?} / {:?} / {:?} / {:?}", doi, wikidata_qid, isbn13, pmid, pmcid, core Err(fe) => generic_err_responses!(fe, LookupReleaseResponse), }; Box::new(futures::done(Ok(ret))) diff --git a/rust/src/entity_crud.rs b/rust/src/entity_crud.rs index d141e838..60503671 100644 --- a/rust/src/entity_crud.rs +++ b/rust/src/entity_crud.rs @@ -1627,16 +1627,6 @@ impl EntityCrud for ReleaseEntity { withdrawn_status: None, withdrawn_date: None, withdrawn_year: None, - doi: None, - pmid: None, - pmcid: None, - isbn13: None, - wikidata_qid: None, - core_id: None, - arxiv_id: None, - jstor_id: None, - ark_id: None, - mag_id: None, volume: None, issue: None, pages: None, @@ -1651,6 +1641,18 @@ impl EntityCrud for ReleaseEntity { language: None, license_slug: None, work_id: None, + ext_ids: ReleaseEntityExtIds { + doi: None, + pmid: None, + pmcid: None, + isbn13: None, + wikidata_qid: None, + core: None, + arxiv: None, + jstor: None, + ark: None, + mag: None, + }, refs: None, contribs: None, abstracts: None, @@ -1916,6 +1918,33 @@ impl EntityCrud for ReleaseEntity { ) }; + let mut ext_ids = ReleaseEntityExtIds { + doi: rev_row.doi, + pmid: rev_row.pmid, + pmcid: rev_row.pmcid, + wikidata_qid: rev_row.wikidata_qid, + core: rev_row.core_id, + isbn13: None, + arxiv: None, + jstor: None, + ark: None, + mag: None, + }; + + let extid_rows: Vec = release_rev_extid::table + .filter(release_rev_extid::release_rev.eq(rev_row.id)) + .get_results(conn)?; + for extid_row in extid_rows { + match extid_row.extid_type.as_ref() { + "isbn13" => ext_ids.isbn13 = Some(extid_row.value), + "arxiv" => ext_ids.arxiv = Some(extid_row.value), + "jstor" => ext_ids.jstor = Some(extid_row.value), + "ark" => ext_ids.ark = Some(extid_row.value), + "mag" => ext_ids.mag = Some(extid_row.value), + _ => (), + } + } + Ok(ReleaseEntity { title: Some(rev_row.title), subtitle: rev_row.subtitle, @@ -1927,16 +1956,7 @@ impl EntityCrud for ReleaseEntity { withdrawn_status: rev_row.withdrawn_status, withdrawn_date: rev_row.withdrawn_date, withdrawn_year: rev_row.withdrawn_year, - doi: rev_row.doi, - pmid: rev_row.pmid, - pmcid: rev_row.pmcid, - isbn13: rev_row.isbn13, - wikidata_qid: rev_row.wikidata_qid, - core_id: rev_row.core_id, - arxiv_id: rev_row.arxiv_id, - jstor_id: rev_row.jstor_id, - ark_id: rev_row.ark_id, - mag_id: rev_row.mag_id, + ext_ids: ext_ids, volume: rev_row.volume, issue: rev_row.issue, pages: rev_row.pages, @@ -1968,16 +1988,16 @@ impl EntityCrud for ReleaseEntity { fn db_insert_revs(conn: &DbConn, models: &[&Self]) -> Result> { // first verify external identifier syntax for entity in models { - if let Some(ref extid) = entity.doi { + if let Some(ref extid) = entity.ext_ids.doi { check_doi(extid)?; } - if let Some(ref extid) = entity.pmid { + if let Some(ref extid) = entity.ext_ids.pmid { check_pmid(extid)?; } - if let Some(ref extid) = entity.pmcid { + if let Some(ref extid) = entity.ext_ids.pmcid { check_pmcid(extid)?; } - if let Some(ref extid) = entity.wikidata_qid { + if let Some(ref extid) = entity.ext_ids.wikidata_qid { check_wikidata_qid(extid)?; } // TODO: JSTOR and arxiv IDs @@ -2092,16 +2112,11 @@ impl EntityCrud for ReleaseEntity { withdrawn_status: model.withdrawn_status.clone(), withdrawn_date: model.withdrawn_date, withdrawn_year: model.withdrawn_year, - doi: model.doi.clone(), - pmid: model.pmid.clone(), - pmcid: model.pmcid.clone(), - wikidata_qid: model.wikidata_qid.clone(), - isbn13: model.isbn13.clone(), - core_id: model.core_id.clone(), - arxiv_id: model.arxiv_id.clone(), - jstor_id: model.jstor_id.clone(), - ark_id: model.ark_id.clone(), - mag_id: model.mag_id.clone(), + doi: model.ext_ids.doi.clone(), + pmid: model.ext_ids.pmid.clone(), + pmcid: model.ext_ids.pmcid.clone(), + wikidata_qid: model.ext_ids.wikidata_qid.clone(), + core_id: model.ext_ids.core.clone(), volume: model.volume.clone(), issue: model.issue.clone(), pages: model.pages.clone(), @@ -2126,11 +2141,50 @@ impl EntityCrud for ReleaseEntity { .returning(release_rev::id) .get_results(conn)?; + let mut release_extid_rows: Vec = vec![]; let mut release_ref_rows: Vec = vec![]; let mut release_contrib_rows: Vec = vec![]; let mut abstract_rows: Vec = vec![]; let mut release_abstract_rows: Vec = vec![]; + for (model, rev_id) in models.iter().zip(rev_ids.iter()) { + if let Some(extid) = &model.ext_ids.isbn13 { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "isbn13".to_string(), + value: extid.clone(), + }); + }; + if let Some(extid) = &model.ext_ids.arxiv { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "arxiv".to_string(), + value: extid.clone(), + }); + }; + if let Some(extid) = &model.ext_ids.jstor { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "jstor".to_string(), + value: extid.clone(), + }); + }; + if let Some(extid) = &model.ext_ids.ark { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "ark".to_string(), + value: extid.clone(), + }); + }; + if let Some(extid) = &model.ext_ids.mag { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "mag".to_string(), + value: extid.clone(), + }); + }; + } + for (model, rev_id) in models.iter().zip(rev_ids.iter()) { // We didn't know the release_rev id to insert here, so need to re-iterate over refs match &model.refs { @@ -2216,6 +2270,12 @@ impl EntityCrud for ReleaseEntity { } // can't insert more than 65k rows at a time, so take chunks + for release_extid_batch in release_extid_rows.chunks(2000) { + insert_into(release_rev_extid::table) + .values(release_extid_batch) + .execute(conn)?; + } + for release_ref_batch in release_ref_rows.chunks(2000) { insert_into(release_ref::table) .values(release_ref_batch) -- cgit v1.2.3