diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-10 15:06:00 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-10 15:06:00 -0700 |
commit | 0bc5118ebf944d1754409dc742552ed1b543346a (patch) | |
tree | d81327741ee0f351a759617c1110abfde996d8a8 | |
parent | 781b0d792c80f3394227a8e60043ff1cbf753ff4 (diff) | |
download | fatcat-0bc5118ebf944d1754409dc742552ed1b543346a.tar.gz fatcat-0bc5118ebf944d1754409dc742552ed1b543346a.zip |
basic impl of extid changes
-rw-r--r-- | rust/src/database_models.rs | 18 | ||||
-rw-r--r-- | rust/src/database_schema.rs | 15 | ||||
-rw-r--r-- | rust/src/endpoint_handlers.rs | 116 | ||||
-rw-r--r-- | rust/src/endpoints.rs | 22 | ||||
-rw-r--r-- | rust/src/entity_crud.rs | 128 |
5 files changed, 191 insertions, 108 deletions
diff --git a/rust/src/database_models.rs b/rust/src/database_models.rs index 3ee7552c..60d6922c 100644 --- a/rust/src/database_models.rs +++ b/rust/src/database_models.rs @@ -389,10 +389,7 @@ pub struct ReleaseRevRow { pub pmid: Option<String>, pub pmcid: Option<String>, pub wikidata_qid: Option<String>, - pub isbn13: Option<String>, pub core_id: Option<String>, - pub arxiv_id: Option<String>, - pub jstor_id: Option<String>, pub volume: Option<String>, pub issue: Option<String>, pub pages: Option<String>, @@ -405,8 +402,6 @@ pub struct ReleaseRevRow { pub withdrawn_status: Option<String>, pub withdrawn_date: Option<chrono::NaiveDate>, pub withdrawn_year: Option<i64>, - pub mag_id: Option<String>, - pub ark_id: Option<String>, } #[derive(Debug, Associations, AsChangeset, Insertable)] @@ -426,10 +421,7 @@ pub struct ReleaseRevNewRow { pub pmid: Option<String>, pub pmcid: Option<String>, pub wikidata_qid: Option<String>, - pub isbn13: Option<String>, pub core_id: Option<String>, - pub arxiv_id: Option<String>, - pub jstor_id: Option<String>, pub volume: Option<String>, pub issue: Option<String>, pub pages: Option<String>, @@ -442,8 +434,14 @@ pub struct ReleaseRevNewRow { pub withdrawn_status: Option<String>, pub withdrawn_date: Option<chrono::NaiveDate>, pub withdrawn_year: Option<i64>, - pub mag_id: Option<String>, - pub ark_id: Option<String>, +} + +#[derive(Debug, Queryable, Associations, AsChangeset, Insertable)] +#[table_name = "release_rev_extid"] +pub struct ReleaseExtidRow { + pub release_rev: Uuid, + pub extid_type: String, + pub value: String, } entity_structs!( diff --git a/rust/src/database_schema.rs b/rust/src/database_schema.rs index ae6a5464..46baba59 100644 --- a/rust/src/database_schema.rs +++ b/rust/src/database_schema.rs @@ -306,10 +306,7 @@ table! { pmid -> Nullable<Text>, pmcid -> Nullable<Text>, wikidata_qid -> Nullable<Text>, - isbn13 -> Nullable<Text>, core_id -> Nullable<Text>, - arxiv_id -> Nullable<Text>, - jstor_id -> Nullable<Text>, volume -> Nullable<Text>, issue -> Nullable<Text>, pages -> Nullable<Text>, @@ -322,8 +319,6 @@ table! { withdrawn_status -> Nullable<Text>, withdrawn_date -> Nullable<Date>, withdrawn_year -> Nullable<Int8>, - mag_id -> Nullable<Text>, - ark_id -> Nullable<Text>, } } @@ -338,6 +333,14 @@ table! { } table! { + release_rev_extid (release_rev, extid_type) { + release_rev -> Uuid, + extid_type -> Text, + value -> Text, + } +} + +table! { webcapture_edit (id) { id -> Uuid, editgroup_id -> Uuid, @@ -459,6 +462,7 @@ joinable!(release_rev -> refs_blob (refs_blob_sha1)); joinable!(release_rev -> work_ident (work_ident_id)); joinable!(release_rev_abstract -> abstracts (abstract_sha1)); joinable!(release_rev_abstract -> release_rev (release_rev)); +joinable!(release_rev_extid -> release_rev (release_rev)); joinable!(webcapture_edit -> editgroup (editgroup_id)); joinable!(webcapture_ident -> webcapture_rev (rev_id)); joinable!(webcapture_rev_cdx -> webcapture_rev (webcapture_rev)); @@ -499,6 +503,7 @@ allow_tables_to_appear_in_same_query!( release_ref, release_rev, release_rev_abstract, + release_rev_extid, webcapture_edit, webcapture_ident, webcapture_rev, diff --git a/rust/src/endpoint_handlers.rs b/rust/src/endpoint_handlers.rs index cd2f1afa..ab3b81ce 100644 --- a/rust/src/endpoint_handlers.rs +++ b/rust/src/endpoint_handlers.rs @@ -260,11 +260,11 @@ impl Server { isbn13: &Option<String>, pmid: &Option<String>, pmcid: &Option<String>, - core_id: &Option<String>, - arxiv_id: &Option<String>, - jstor_id: &Option<String>, - ark_id: &Option<String>, - mag_id: &Option<String>, + core: &Option<String>, + arxiv: &Option<String>, + jstor: &Option<String>, + ark: &Option<String>, + mag: &Option<String>, expand_flags: ExpandFlags, hide_flags: HideFlags, ) -> Result<ReleaseEntity> { @@ -274,11 +274,11 @@ impl Server { isbn13, pmid, pmcid, - core_id, - arxiv_id, - jstor_id, - ark_id, - mag_id, + core, + arxiv, + jstor, + ark, + mag, ) { (Some(doi), None, None, None, None, None, None, None, None, None) => { // DOIs always stored lower-case; lookups are case-insensitive @@ -302,12 +302,16 @@ impl Server { } (None, None, Some(isbn13), None, None, None, None, None, None, None) => { // TODO: check_isbn13(isbn13)?; - release_ident::table - .inner_join(release_rev::table) - .filter(release_rev::isbn13.eq(isbn13)) - .filter(release_ident::is_live.eq(true)) - .filter(release_ident::redirect_id.is_null()) - .first(conn)? + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("isbn13".to_string())) + .filter(release_rev_extid::value.eq(isbn13)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) } (None, None, None, Some(pmid), None, None, None, None, None, None) => { check_pmid(pmid)?; @@ -327,50 +331,66 @@ impl Server { .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, None, Some(core_id), None, None, None, None) => { - // TODO: check_core_id(core_id)?; + (None, None, None, None, None, Some(core), None, None, None, None) => { + // TODO: check_core_id(core)?; release_ident::table .inner_join(release_rev::table) - .filter(release_rev::core_id.eq(core_id)) + .filter(release_rev::core_id.eq(core)) .filter(release_ident::is_live.eq(true)) .filter(release_ident::redirect_id.is_null()) .first(conn)? } - (None, None, None, None, None, None, Some(arxiv_id), None, None, None) => { + (None, None, None, None, None, None, Some(arxiv), None, None, None) => { // TODO: check_arxiv_id(arxiv_id)?; - release_ident::table - .inner_join(release_rev::table) - .filter(release_rev::arxiv_id.eq(arxiv_id)) - .filter(release_ident::is_live.eq(true)) - .filter(release_ident::redirect_id.is_null()) - .first(conn)? + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("arxiv".to_string())) + .filter(release_rev_extid::value.eq(arxiv)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) } - (None, None, None, None, None, None, None, Some(jstor_id), None, None) => { + (None, None, None, None, None, None, None, Some(jstor), None, None) => { // TODO: check_jstor_id(jstor_id)?; - release_ident::table - .inner_join(release_rev::table) - .filter(release_rev::jstor_id.eq(jstor_id)) - .filter(release_ident::is_live.eq(true)) - .filter(release_ident::redirect_id.is_null()) - .first(conn)? + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("jstor".to_string())) + .filter(release_rev_extid::value.eq(jstor)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) } - (None, None, None, None, None, None, None, None, Some(ark_id), None) => { + (None, None, None, None, None, None, None, None, Some(ark), None) => { // TODO: check_ark_id(ark_id)?; - release_ident::table - .inner_join(release_rev::table) - .filter(release_rev::ark_id.eq(ark_id)) - .filter(release_ident::is_live.eq(true)) - .filter(release_ident::redirect_id.is_null()) - .first(conn)? + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("ark".to_string())) + .filter(release_rev_extid::value.eq(ark)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) } - (None, None, None, None, None, None, None, None, None, Some(mag_id)) => { - // TODO: check_ark_id(ark_id)?; - release_ident::table - .inner_join(release_rev::table) - .filter(release_rev::mag_id.eq(mag_id)) - .filter(release_ident::is_live.eq(true)) - .filter(release_ident::redirect_id.is_null()) - .first(conn)? + (None, None, None, None, None, None, None, None, None, Some(mag)) => { + // TODO: check_mag_id(mag_id)?; + let (rev, ident, _extid): (ReleaseRevRow, ReleaseIdentRow, ReleaseExtidRow) = + release_rev::table + .inner_join(release_ident::table) + .inner_join(release_rev_extid::table) + .filter(release_rev_extid::extid_type.eq("mag".to_string())) + .filter(release_rev_extid::value.eq(mag)) + .filter(release_ident::is_live.eq(true)) + .filter(release_ident::redirect_id.is_null()) + .first(conn)?; + (ident, rev) } _ => { return Err( diff --git a/rust/src/endpoints.rs b/rust/src/endpoints.rs index d13a760e..4817184f 100644 --- a/rust/src/endpoints.rs +++ b/rust/src/endpoints.rs @@ -670,11 +670,11 @@ impl Api for Server { isbn13: Option<String>, pmid: Option<String>, pmcid: Option<String>, - core_id: Option<String>, - arxiv_id: Option<String>, - jstor_id: Option<String>, - ark_id: Option<String>, - mag_id: Option<String>, + core: Option<String>, + arxiv: Option<String>, + jstor: Option<String>, + ark: Option<String>, + mag: Option<String>, expand: Option<String>, hide: Option<String>, _context: &Context, @@ -697,11 +697,11 @@ impl Api for Server { &isbn13, &pmid, &pmcid, - &core_id, - &arxiv_id, - &jstor_id, - &ark_id, - &mag_id, + &core, + &arxiv, + &jstor, + &ark, + &mag, expand_flags, hide_flags, ) @@ -709,7 +709,7 @@ impl Api for Server { { Ok(entity) => LookupReleaseResponse::FoundEntity(entity), // TODO: ensure good 'Not Found" error message here - // (was: "Not found: {:?} / {:?} / {:?} / {:?} / {:?} / {:?}", doi, wikidata_qid, isbn13, pmid, pmcid, core_id + // (was: "Not found: {:?} / {:?} / {:?} / {:?} / {:?} / {:?}", doi, wikidata_qid, isbn13, pmid, pmcid, core Err(fe) => generic_err_responses!(fe, LookupReleaseResponse), }; Box::new(futures::done(Ok(ret))) diff --git a/rust/src/entity_crud.rs b/rust/src/entity_crud.rs index d141e838..60503671 100644 --- a/rust/src/entity_crud.rs +++ b/rust/src/entity_crud.rs @@ -1627,16 +1627,6 @@ impl EntityCrud for ReleaseEntity { withdrawn_status: None, withdrawn_date: None, withdrawn_year: None, - doi: None, - pmid: None, - pmcid: None, - isbn13: None, - wikidata_qid: None, - core_id: None, - arxiv_id: None, - jstor_id: None, - ark_id: None, - mag_id: None, volume: None, issue: None, pages: None, @@ -1651,6 +1641,18 @@ impl EntityCrud for ReleaseEntity { language: None, license_slug: None, work_id: None, + ext_ids: ReleaseEntityExtIds { + doi: None, + pmid: None, + pmcid: None, + isbn13: None, + wikidata_qid: None, + core: None, + arxiv: None, + jstor: None, + ark: None, + mag: None, + }, refs: None, contribs: None, abstracts: None, @@ -1916,6 +1918,33 @@ impl EntityCrud for ReleaseEntity { ) }; + let mut ext_ids = ReleaseEntityExtIds { + doi: rev_row.doi, + pmid: rev_row.pmid, + pmcid: rev_row.pmcid, + wikidata_qid: rev_row.wikidata_qid, + core: rev_row.core_id, + isbn13: None, + arxiv: None, + jstor: None, + ark: None, + mag: None, + }; + + let extid_rows: Vec<ReleaseExtidRow> = release_rev_extid::table + .filter(release_rev_extid::release_rev.eq(rev_row.id)) + .get_results(conn)?; + for extid_row in extid_rows { + match extid_row.extid_type.as_ref() { + "isbn13" => ext_ids.isbn13 = Some(extid_row.value), + "arxiv" => ext_ids.arxiv = Some(extid_row.value), + "jstor" => ext_ids.jstor = Some(extid_row.value), + "ark" => ext_ids.ark = Some(extid_row.value), + "mag" => ext_ids.mag = Some(extid_row.value), + _ => (), + } + } + Ok(ReleaseEntity { title: Some(rev_row.title), subtitle: rev_row.subtitle, @@ -1927,16 +1956,7 @@ impl EntityCrud for ReleaseEntity { withdrawn_status: rev_row.withdrawn_status, withdrawn_date: rev_row.withdrawn_date, withdrawn_year: rev_row.withdrawn_year, - doi: rev_row.doi, - pmid: rev_row.pmid, - pmcid: rev_row.pmcid, - isbn13: rev_row.isbn13, - wikidata_qid: rev_row.wikidata_qid, - core_id: rev_row.core_id, - arxiv_id: rev_row.arxiv_id, - jstor_id: rev_row.jstor_id, - ark_id: rev_row.ark_id, - mag_id: rev_row.mag_id, + ext_ids: ext_ids, volume: rev_row.volume, issue: rev_row.issue, pages: rev_row.pages, @@ -1968,16 +1988,16 @@ impl EntityCrud for ReleaseEntity { fn db_insert_revs(conn: &DbConn, models: &[&Self]) -> Result<Vec<Uuid>> { // first verify external identifier syntax for entity in models { - if let Some(ref extid) = entity.doi { + if let Some(ref extid) = entity.ext_ids.doi { check_doi(extid)?; } - if let Some(ref extid) = entity.pmid { + if let Some(ref extid) = entity.ext_ids.pmid { check_pmid(extid)?; } - if let Some(ref extid) = entity.pmcid { + if let Some(ref extid) = entity.ext_ids.pmcid { check_pmcid(extid)?; } - if let Some(ref extid) = entity.wikidata_qid { + if let Some(ref extid) = entity.ext_ids.wikidata_qid { check_wikidata_qid(extid)?; } // TODO: JSTOR and arxiv IDs @@ -2092,16 +2112,11 @@ impl EntityCrud for ReleaseEntity { withdrawn_status: model.withdrawn_status.clone(), withdrawn_date: model.withdrawn_date, withdrawn_year: model.withdrawn_year, - doi: model.doi.clone(), - pmid: model.pmid.clone(), - pmcid: model.pmcid.clone(), - wikidata_qid: model.wikidata_qid.clone(), - isbn13: model.isbn13.clone(), - core_id: model.core_id.clone(), - arxiv_id: model.arxiv_id.clone(), - jstor_id: model.jstor_id.clone(), - ark_id: model.ark_id.clone(), - mag_id: model.mag_id.clone(), + doi: model.ext_ids.doi.clone(), + pmid: model.ext_ids.pmid.clone(), + pmcid: model.ext_ids.pmcid.clone(), + wikidata_qid: model.ext_ids.wikidata_qid.clone(), + core_id: model.ext_ids.core.clone(), volume: model.volume.clone(), issue: model.issue.clone(), pages: model.pages.clone(), @@ -2126,12 +2141,51 @@ impl EntityCrud for ReleaseEntity { .returning(release_rev::id) .get_results(conn)?; + let mut release_extid_rows: Vec<ReleaseExtidRow> = vec![]; let mut release_ref_rows: Vec<ReleaseRefRow> = vec![]; let mut release_contrib_rows: Vec<ReleaseContribNewRow> = vec![]; let mut abstract_rows: Vec<AbstractsRow> = vec![]; let mut release_abstract_rows: Vec<ReleaseRevAbstractNewRow> = vec![]; for (model, rev_id) in models.iter().zip(rev_ids.iter()) { + if let Some(extid) = &model.ext_ids.isbn13 { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "isbn13".to_string(), + value: extid.clone(), + }); + }; + if let Some(extid) = &model.ext_ids.arxiv { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "arxiv".to_string(), + value: extid.clone(), + }); + }; + if let Some(extid) = &model.ext_ids.jstor { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "jstor".to_string(), + value: extid.clone(), + }); + }; + if let Some(extid) = &model.ext_ids.ark { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "ark".to_string(), + value: extid.clone(), + }); + }; + if let Some(extid) = &model.ext_ids.mag { + release_extid_rows.push(ReleaseExtidRow { + release_rev: *rev_id, + extid_type: "mag".to_string(), + value: extid.clone(), + }); + }; + } + + for (model, rev_id) in models.iter().zip(rev_ids.iter()) { // We didn't know the release_rev id to insert here, so need to re-iterate over refs match &model.refs { None => (), @@ -2216,6 +2270,12 @@ impl EntityCrud for ReleaseEntity { } // can't insert more than 65k rows at a time, so take chunks + for release_extid_batch in release_extid_rows.chunks(2000) { + insert_into(release_rev_extid::table) + .values(release_extid_batch) + .execute(conn)?; + } + for release_ref_batch in release_ref_rows.chunks(2000) { insert_into(release_ref::table) .values(release_ref_batch) |