diff options
-rw-r--r-- | extra/elasticsearch/release_schema.json | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 17 | ||||
-rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 6 | ||||
-rw-r--r-- | python/fatcat_tools/transforms.py | 4 | ||||
-rw-r--r-- | python/fatcat_web/templates/creator_view.html | 5 | ||||
-rw-r--r-- | python/fatcat_web/templates/release_search.html | 4 | ||||
-rw-r--r-- | python/fatcat_web/templates/release_view.html | 6 | ||||
-rw-r--r-- | python/fatcat_web/templates/work_view.html | 4 | ||||
-rw-r--r-- | rust/migrations/2018-05-12-001226_init/up.sql | 9 | ||||
-rw-r--r-- | rust/src/api_entity_crud.rs | 34 | ||||
-rw-r--r-- | rust/src/api_helpers.rs | 22 | ||||
-rw-r--r-- | rust/src/api_server.rs | 6 | ||||
-rw-r--r-- | rust/src/database_models.rs | 2 | ||||
-rw-r--r-- | rust/src/database_schema.rs | 1 | ||||
-rw-r--r-- | rust/tests/test_api_server_http.rs | 76 |
15 files changed, 160 insertions, 38 deletions
diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json index 25478b1b..c9b77301 100644 --- a/extra/elasticsearch/release_schema.json +++ b/extra/elasticsearch/release_schema.json @@ -33,10 +33,12 @@ "author": { "type": "alias", "path": "contrib_names" }, "journal": { "type": "alias", "path": "container_name" }, "date": { "type": "alias", "path": "release_date" }, + "year": { "type": "alias", "path": "release_year" }, "issn": { "type": "alias", "path": "container_issnl" }, "oa": { "type": "alias", "path": "container_is_oa" }, "longtail": { "type": "alias", "path": "container_is_longtail_oa" }, "release_date": { "type": "date" }, + "release_year": { "type": "integer" }, "release_type": { "type": "keyword" }, "release_status": { "type": "keyword" }, "language": { "type": "keyword" }, diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 1ea47707..13179207 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -250,16 +250,18 @@ class CrossrefImporter(FatcatImporter): return None # release date parsing is amazingly complex - release_date = obj['issued']['date-parts'][0] - if not release_date or not release_date[0]: + raw_date = obj['issued']['date-parts'][0] + if not raw_date or not raw_date[0]: # got some NoneType, even though at least year is supposed to be set + release_year = None release_date = None - elif len(release_date) == 3: - release_date = datetime.date(year=release_date[0], month=release_date[1], day=release_date[2]) + elif len(raw_date) == 3: + release_year = raw_date[0] + release_date = datetime.date(year=raw_date[0], month=raw_date[1], day=raw_date[2]) else: - # only the year is actually required; mangle to first day for date - # (TODO: something better?) - release_date = datetime.date(year=release_date[0], month=1, day=1) + # sometimes only the year is included, not the full date + release_year = raw_date[0] + release_date = None re = fatcat_client.ReleaseEntity( work_id=None, @@ -277,6 +279,7 @@ class CrossrefImporter(FatcatImporter): pmcid=extids['pmcid'], wikidata_qid=extids['wikidata_qid'], release_date=release_date, + release_year=release_year, issue=obj.get('issue'), volume=obj.get('volume'), pages=obj.get('page'), diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index b84f7145..47a753a6 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -66,9 +66,10 @@ class GrobidMetadataImporter(FatcatImporter): refs.append(ref) release_date = None + release_year = None if obj.get('date'): - # TODO: only returns year, ever? how to handle? - release_date = datetime.datetime(year=int(obj['date'][:4]), month=1, day=1).date() + # only returns year, ever? + release_year = int(obj['date'][:4]) if obj.get('doi'): extra['doi'] = obj['doi'] @@ -88,6 +89,7 @@ class GrobidMetadataImporter(FatcatImporter): title=obj['title'].strip(), release_type="article-journal", release_date=release_date, + release_year=release_year, contribs=contribs, refs=refs, publisher=obj['journal'].get('publisher'), diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py index 516b68ae..843c00a5 100644 --- a/python/fatcat_tools/transforms.py +++ b/python/fatcat_tools/transforms.py @@ -48,6 +48,10 @@ def release_to_elasticsearch(release): if release.release_date: # .isoformat() results in, eg, '2010-10-22' (YYYY-MM-DD) t['release_date'] = release.release_date.isoformat() + if release.release_year is None: + t['release_year'] = release.release_date.year + if release.release_year is not None: + t['release_year'] = release.release_year container = release.container container_is_kept = False diff --git a/python/fatcat_web/templates/creator_view.html b/python/fatcat_web/templates/creator_view.html index 2ce01fb6..802ca4c9 100644 --- a/python/fatcat_web/templates/creator_view.html +++ b/python/fatcat_web/templates/creator_view.html @@ -34,8 +34,9 @@ <p>This creator has contributed to: <ul> {% for release in releases %} - <li>"{{ release.title }}", a {{ release.release_type }} published {{ release.release_date }} - {% if release.release_status != None %}(status: <code>{{ release.release_status }})</code>{% endif %}. + <li>"{{ release.title }}", a {{ release.release_type }} + {% if release.release_year != None %}published in {{ release.release_year }}{% endif %} + {% if release.release_status != None %}(status: <code>{{ release.release_status or "(unknown)" }})</code>{% endif %}. <br>Fatcat ID: <a href="/release/{{ release.ident }}"><code>{{ release.ident }}</code></a> {% endfor %} </ul> diff --git a/python/fatcat_web/templates/release_search.html b/python/fatcat_web/templates/release_search.html index 18bda117..c6a6df60 100644 --- a/python/fatcat_web/templates/release_search.html +++ b/python/fatcat_web/templates/release_search.html @@ -31,8 +31,8 @@ {% if paper.doi %} DOI: <a href="https://doi.org/{{paper.doi }}" style="color: green;">{{ paper.doi }}</a> - {{ paper.release_type }} - {% if paper.release_date %} - - {{ paper.release_date[:4] }} + {% if paper.release_year %} + - {{ paper.release_year }} {% endif %} <br> {% endif %} diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html index 85492eba..fd86b7c9 100644 --- a/python/fatcat_web/templates/release_view.html +++ b/python/fatcat_web/templates/release_view.html @@ -22,7 +22,11 @@ <div class="one wide column"></div> <div class="ten wide column" style="font-size: 16px;"> -{% if release.release_date != None %}<p><b>Date (published):</b> {{ release.release_date }}{% endif %} +{% if release.release_date != None %} + <p><b>Date (published):</b> {{ release.release_date }} +{% elif release.release_year != None %} + <p><b>Date (published):</b> {{ release.release_year }} +{% endif %} {% if release.pmid != None %} <br><b>PubMed:</b> <a href="https://www.ncbi.nlm.nih.gov/pubmed/{{ release.pmid }}"> <code>{{ release.pmid }}</code></a> {% endif %} diff --git a/python/fatcat_web/templates/work_view.html b/python/fatcat_web/templates/work_view.html index 87120e63..c767e1ba 100644 --- a/python/fatcat_web/templates/work_view.html +++ b/python/fatcat_web/templates/work_view.html @@ -34,7 +34,9 @@ still reference the same underlying "work". {% if releases != [] %} <ul> {% for release in releases %} - <li>"{{ release.title }}", a {{ release.release_type }} published {{ release.release_date }} as <code>{{ release.release_status }}</code>. + <li>"{{ release.title }}", a {{ release.release_type }} published + {% if release.release_year != None %}in {{ release.release_year }}{% endif %} + as <code>{{ release.release_status or "(unknown)" }}</code>. <br><a href="/release/{{ release.ident }}"><code>{{ release.ident }}</code></a> {% endfor %} </ul> diff --git a/rust/migrations/2018-05-12-001226_init/up.sql b/rust/migrations/2018-05-12-001226_init/up.sql index 7754b328..c842295e 100644 --- a/rust/migrations/2018-05-12-001226_init/up.sql +++ b/rust/migrations/2018-05-12-001226_init/up.sql @@ -196,6 +196,7 @@ CREATE TABLE release_rev ( release_type TEXT, -- TODO: enum release_status TEXT, -- TODO: enum release_date DATE, + release_year BIGINT, doi TEXT, -- CHECK for length limit for data quality pmid TEXT CHECK(octet_length(pmid) <= 12), @@ -430,10 +431,10 @@ INSERT INTO work_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VA ('00000000-0000-0000-5555-000000000002', '00000000-0000-0000-5555-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', null), ('00000000-0000-0000-5555-000000000002', '00000000-0000-0000-5555-FFF000000003', null, '00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-5555-FFF000000002'); -INSERT INTO release_rev (id, work_ident_id, container_ident_id, title, release_type, release_status, release_date, doi, wikidata_qid, pmid, pmcid, isbn13, core_id, volume, issue, pages, publisher, language) VALUES - ('00000000-0000-0000-4444-FFF000000001', '00000000-0000-0000-5555-000000000001', null, 'example title', null, null, null, null, null, null, null, null, null, null, null, null, null, null), - ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-5555-000000000002', '00000000-0000-0000-1111-000000000001', 'bigger example', 'article-journal', null, '2018-01-01', '10.123/abc', 'Q55555', '54321', 'PMC555','978-3-16-148410-0', '42022773', '12', 'IV', '5-9', 'bogus publishing group', 'cn'), - ('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-5555-000000000003', '00000000-0000-0000-1111-000000000003', 'Why Most Published Research Findings Are False', 'article-journal', 'published', '2005-08-30', '10.1371/journal.pmed.0020124', null, null, null, null, null, '2', '8', 'e124', 'Public Library of Science', 'en'); +INSERT INTO release_rev (id, work_ident_id, container_ident_id, title, release_type, release_status, release_date, release_year, doi, wikidata_qid, pmid, pmcid, isbn13, core_id, volume, issue, pages, publisher, language) VALUES + ('00000000-0000-0000-4444-FFF000000001', '00000000-0000-0000-5555-000000000001', null, 'example title', null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), + ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-5555-000000000002', '00000000-0000-0000-1111-000000000001', 'bigger example', 'article-journal', null, '2018-01-01', 2018, '10.123/abc', 'Q55555', '54321', 'PMC555','978-3-16-148410-0', '42022773', '12', 'IV', '5-9', 'bogus publishing group', 'cn'), + ('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-5555-000000000003', '00000000-0000-0000-1111-000000000003', 'Why Most Published Research Findings Are False', 'article-journal', 'published', '2005-08-30', 2005, '10.1371/journal.pmed.0020124', null, null, null, null, null, '2', '8', 'e124', 'Public Library of Science', 'en'); INSERT INTO release_ident (id, is_live, rev_id, redirect_id) VALUES ('00000000-0000-0000-4444-000000000001', true, '00000000-0000-0000-4444-FFF000000001', null), -- aaaaaaaaaaaaarceaaaaaaaaae diff --git a/rust/src/api_entity_crud.rs b/rust/src/api_entity_crud.rs index 792e6f9a..ee2d4ef3 100644 --- a/rust/src/api_entity_crud.rs +++ b/rust/src/api_entity_crud.rs @@ -166,7 +166,9 @@ macro_rules! generic_db_create_batch { ) -> Result<Vec<Self::EditRow>> { if models.iter().any(|m| m.redirect.is_some()) { return Err(ErrorKind::OtherBadRequest( - "can't create an entity that redirects from the start".to_string()).into()); + "can't create an entity that redirects from the start".to_string(), + ) + .into()); } let rev_ids: Vec<Uuid> = Self::db_insert_revs(conn, models)?; let ident_ids: Vec<Uuid> = insert_into($ident_table::table) @@ -658,7 +660,9 @@ impl EntityCrud for ContainerEntity { if models.iter().any(|m| m.name.is_none()) { return Err(ErrorKind::OtherBadRequest( - "name is required for all Container entities".to_string()).into()); + "name is required for all Container entities".to_string(), + ) + .into()); } let rev_ids: Vec<Uuid> = insert_into(container_rev::table) @@ -767,14 +771,16 @@ impl EntityCrud for CreatorEntity { if models.iter().any(|m| m.display_name.is_none()) { return Err(ErrorKind::OtherBadRequest( - "display_name is required for all Creator entities".to_string()).into()); + "display_name is required for all Creator entities".to_string(), + ) + .into()); } let rev_ids: Vec<Uuid> = insert_into(creator_rev::table) .values( models .iter() - .map(|model|CreatorRevNewRow { + .map(|model| CreatorRevNewRow { display_name: model.display_name.clone().unwrap(), // unwrapped checked above given_name: model.given_name.clone(), surname: model.surname.clone(), @@ -996,6 +1002,7 @@ impl EntityCrud for ReleaseEntity { release_type: None, release_status: None, release_date: None, + release_year: None, doi: None, pmid: None, pmcid: None, @@ -1029,7 +1036,7 @@ impl EntityCrud for ReleaseEntity { fn db_expand(&mut self, conn: &DbConn, expand: ExpandFlags) -> Result<()> { // Don't expand deleted entities if self.state == Some("deleted".to_string()) { - return Ok(()) + return Ok(()); } // TODO: should clarify behavior here. Would hit this path, eg, expanding files on a // release revision (not ident). Should we fail (Bad Request), or silently just not include @@ -1062,7 +1069,8 @@ impl EntityCrud for ReleaseEntity { contrib.creator = Some(CreatorEntity::db_get( conn, FatCatId::from_str(creator_id)?, - HideFlags::none())?); + HideFlags::none(), + )?); } } } @@ -1073,7 +1081,9 @@ impl EntityCrud for ReleaseEntity { fn db_create(&self, conn: &DbConn, edit_context: &EditContext) -> Result<Self::EditRow> { if self.redirect.is_some() { return Err(ErrorKind::OtherBadRequest( - "can't create an entity that redirects from the start".to_string()).into()); + "can't create an entity that redirects from the start".to_string(), + ) + .into()); } let mut edits = Self::db_create_batch(conn, edit_context, &[self])?; // probably a more elegant way to destroy the vec and take first element @@ -1089,7 +1099,9 @@ impl EntityCrud for ReleaseEntity { // of the release entities passed (at least in the common case) if models.iter().any(|m| m.redirect.is_some()) { return Err(ErrorKind::OtherBadRequest( - "can't create an entity that redirects from the start".to_string()).into()); + "can't create an entity that redirects from the start".to_string(), + ) + .into()); } // Generate the set of new work entities to insert (usually one for each release, but some @@ -1252,6 +1264,7 @@ impl EntityCrud for ReleaseEntity { release_type: rev_row.release_type, release_status: rev_row.release_status, release_date: rev_row.release_date, + release_year: rev_row.release_year, doi: rev_row.doi, pmid: rev_row.pmid, pmcid: rev_row.pmcid, @@ -1310,7 +1323,9 @@ impl EntityCrud for ReleaseEntity { if models.iter().any(|m| m.title.is_none()) { return Err(ErrorKind::OtherBadRequest( - "title is required for all Release entities".to_string()).into()); + "title is required for all Release entities".to_string(), + ) + .into()); } let rev_ids: Vec<Uuid> = insert_into(release_rev::table) @@ -1323,6 +1338,7 @@ impl EntityCrud for ReleaseEntity { release_type: model.release_type.clone(), release_status: model.release_status.clone(), release_date: model.release_date, + release_year: model.release_year, doi: model.doi.clone(), pmid: model.pmid.clone(), pmcid: model.pmcid.clone(), diff --git a/rust/src/api_helpers.rs b/rust/src/api_helpers.rs index 32750836..b837dfc2 100644 --- a/rust/src/api_helpers.rs +++ b/rust/src/api_helpers.rs @@ -22,7 +22,6 @@ pub struct EditContext { } impl EditContext { - /// This function should always be run within a transaction pub fn check(&self, conn: &DbConn) -> Result<()> { let count: i64 = changelog::table @@ -36,7 +35,6 @@ impl EditContext { } } - #[derive(Clone, Copy, PartialEq)] pub struct ExpandFlags { pub files: bool, @@ -456,11 +454,21 @@ pub fn check_sha256(raw: &str) -> Result<()> { #[test] fn test_check_sha256() { - assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_ok()); - assert!(check_sha256("gb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err()); - assert!(check_sha256("UB1C378F464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err()); - assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e45").is_err()); - assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e4522").is_err()); + assert!( + check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_ok() + ); + assert!( + check_sha256("gb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err() + ); + assert!( + check_sha256("UB1C378F464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err() + ); + assert!( + check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e45").is_err() + ); + assert!( + check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e4522").is_err() + ); } pub fn check_release_type(raw: &str) -> Result<()> { diff --git a/rust/src/api_server.rs b/rust/src/api_server.rs index adc6ab11..d06de9c1 100644 --- a/rust/src/api_server.rs +++ b/rust/src/api_server.rs @@ -186,7 +186,7 @@ impl Server { .filter(file_ident::is_live.eq(true)) .filter(file_ident::redirect_id.is_null()) .first(conn)? - }, + } (None, Some(sha1), None) => { check_sha1(sha1)?; file_ident::table @@ -195,7 +195,7 @@ impl Server { .filter(file_ident::is_live.eq(true)) .filter(file_ident::redirect_id.is_null()) .first(conn)? - }, + } (None, None, Some(sha256)) => { check_sha256(sha256)?; file_ident::table @@ -204,7 +204,7 @@ impl Server { .filter(file_ident::is_live.eq(true)) .filter(file_ident::redirect_id.is_null()) .first(conn)? - }, + } _ => { return Err(ErrorKind::MissingOrMultipleExternalId("in lookup".to_string()).into()); } diff --git a/rust/src/database_models.rs b/rust/src/database_models.rs index d5daf9a4..617b150b 100644 --- a/rust/src/database_models.rs +++ b/rust/src/database_models.rs @@ -247,6 +247,7 @@ pub struct ReleaseRevRow { pub release_type: Option<String>, pub release_status: Option<String>, pub release_date: Option<chrono::NaiveDate>, + pub release_year: Option<i64>, pub doi: Option<String>, pub pmid: Option<String>, pub pmcid: Option<String>, @@ -270,6 +271,7 @@ pub struct ReleaseRevNewRow { pub release_type: Option<String>, pub release_status: Option<String>, pub release_date: Option<chrono::NaiveDate>, + pub release_year: Option<i64>, pub doi: Option<String>, pub pmid: Option<String>, pub pmcid: Option<String>, diff --git a/rust/src/database_schema.rs b/rust/src/database_schema.rs index 829a21b8..6c1fb929 100644 --- a/rust/src/database_schema.rs +++ b/rust/src/database_schema.rs @@ -211,6 +211,7 @@ table! { release_type -> Nullable<Text>, release_status -> Nullable<Text>, release_date -> Nullable<Date>, + release_year -> Nullable<Int8>, doi -> Nullable<Text>, pmid -> Nullable<Text>, pmcid -> Nullable<Text>, diff --git a/rust/tests/test_api_server_http.rs b/rust/tests/test_api_server_http.rs index 714cfc68..d6cdb6d3 100644 --- a/rust/tests/test_api_server_http.rs +++ b/rust/tests/test_api_server_http.rs @@ -586,6 +586,7 @@ fn test_post_release() { r#"{"title": "secret paper", "release_type": "article-journal", "release_date": "2000-01-02", + "release_year": 2000, "doi": "10.1234/abcde.781231231239", "pmid": "54321", "pmcid": "PMC12345", @@ -1230,6 +1231,81 @@ fn test_release_dates() { None, ); + // Ok + check_http_response( + request::post( + "http://localhost:9411/v0/release", + headers.clone(), + r#"{"title": "secret minimal paper", + "release_type": "article-journal", + "release_year": 2000 + }"#, + &router, + ), + status::Created, + None, + ); + + // Ok; ISO 8601 + check_http_response( + request::post( + "http://localhost:9411/v0/release", + headers.clone(), + r#"{"title": "secret minimal paper", + "release_type": "article-journal", + "release_year": -100 + }"#, + &router, + ), + status::Created, + None, + ); + check_http_response( + request::post( + "http://localhost:9411/v0/release", + headers.clone(), + r#"{"title": "secret minimal paper", + "release_type": "article-journal", + "release_year": 0 + }"#, + &router, + ), + status::Created, + None, + ); + + // Ok + check_http_response( + request::post( + "http://localhost:9411/v0/release", + headers.clone(), + r#"{"title": "secret minimal paper", + "release_type": "article-journal", + "release_date": "2000-01-02", + "release_year": 2000 + }"#, + &router, + ), + status::Created, + None, + ); + + // Ok for now, but may be excluded later + check_http_response( + request::post( + "http://localhost:9411/v0/release", + headers.clone(), + r#"{"title": "secret minimal paper", + "release_type": "article-journal", + "release_date": "2000-01-02", + "release_year": 1999 + }"#, + &router, + ), + status::Created, + None, + ); + // Bad: year/month only check_http_response( request::post( |