diff options
| -rw-r--r-- | extra/elasticsearch/release_schema.json | 2 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 17 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 6 | ||||
| -rw-r--r-- | python/fatcat_tools/transforms.py | 4 | ||||
| -rw-r--r-- | python/fatcat_web/templates/creator_view.html | 5 | ||||
| -rw-r--r-- | python/fatcat_web/templates/release_search.html | 4 | ||||
| -rw-r--r-- | python/fatcat_web/templates/release_view.html | 6 | ||||
| -rw-r--r-- | python/fatcat_web/templates/work_view.html | 4 | ||||
| -rw-r--r-- | rust/migrations/2018-05-12-001226_init/up.sql | 9 | ||||
| -rw-r--r-- | rust/src/api_entity_crud.rs | 34 | ||||
| -rw-r--r-- | rust/src/api_helpers.rs | 22 | ||||
| -rw-r--r-- | rust/src/api_server.rs | 6 | ||||
| -rw-r--r-- | rust/src/database_models.rs | 2 | ||||
| -rw-r--r-- | rust/src/database_schema.rs | 1 | ||||
| -rw-r--r-- | rust/tests/test_api_server_http.rs | 76 | 
15 files changed, 160 insertions, 38 deletions
| diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json index 25478b1b..c9b77301 100644 --- a/extra/elasticsearch/release_schema.json +++ b/extra/elasticsearch/release_schema.json @@ -33,10 +33,12 @@              "author":         { "type": "alias", "path": "contrib_names" },              "journal":        { "type": "alias", "path": "container_name" },              "date":           { "type": "alias", "path": "release_date" }, +            "year":           { "type": "alias", "path": "release_year" },              "issn":           { "type": "alias", "path": "container_issnl" },              "oa":             { "type": "alias", "path": "container_is_oa" },              "longtail":       { "type": "alias", "path": "container_is_longtail_oa" },              "release_date":   { "type": "date" }, +            "release_year":   { "type": "integer" },              "release_type":   { "type": "keyword" },              "release_status": { "type": "keyword" },              "language": { "type": "keyword" }, diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 1ea47707..13179207 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -250,16 +250,18 @@ class CrossrefImporter(FatcatImporter):              return None          # release date parsing is amazingly complex -        release_date = obj['issued']['date-parts'][0] -        if not release_date or not release_date[0]: +        raw_date = obj['issued']['date-parts'][0] +        if not raw_date or not raw_date[0]:              # got some NoneType, even though at least year is supposed to be set +            release_year = None              release_date = None -        elif len(release_date) == 3: -            release_date = datetime.date(year=release_date[0], month=release_date[1], day=release_date[2]) +        elif len(raw_date) == 3: +            release_year = raw_date[0] +            release_date = datetime.date(year=raw_date[0], month=raw_date[1], day=raw_date[2])          else: -            # only the year is actually required; mangle to first day for date -            # (TODO: something better?) -            release_date = datetime.date(year=release_date[0], month=1, day=1) +            # sometimes only the year is included, not the full date +            release_year = raw_date[0] +            release_date = None          re = fatcat_client.ReleaseEntity(              work_id=None, @@ -277,6 +279,7 @@ class CrossrefImporter(FatcatImporter):              pmcid=extids['pmcid'],              wikidata_qid=extids['wikidata_qid'],              release_date=release_date, +            release_year=release_year,              issue=obj.get('issue'),              volume=obj.get('volume'),              pages=obj.get('page'), diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index b84f7145..47a753a6 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -66,9 +66,10 @@ class GrobidMetadataImporter(FatcatImporter):              refs.append(ref)          release_date = None +        release_year = None          if obj.get('date'): -            # TODO: only returns year, ever? how to handle? -            release_date = datetime.datetime(year=int(obj['date'][:4]), month=1, day=1).date() +            # only returns year, ever? +            release_year = int(obj['date'][:4])          if obj.get('doi'):              extra['doi'] = obj['doi'] @@ -88,6 +89,7 @@ class GrobidMetadataImporter(FatcatImporter):              title=obj['title'].strip(),              release_type="article-journal",              release_date=release_date, +            release_year=release_year,              contribs=contribs,              refs=refs,              publisher=obj['journal'].get('publisher'), diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py index 516b68ae..843c00a5 100644 --- a/python/fatcat_tools/transforms.py +++ b/python/fatcat_tools/transforms.py @@ -48,6 +48,10 @@ def release_to_elasticsearch(release):      if release.release_date:          # .isoformat() results in, eg, '2010-10-22' (YYYY-MM-DD)          t['release_date'] = release.release_date.isoformat() +        if release.release_year is None: +            t['release_year'] = release.release_date.year +    if release.release_year is not None: +        t['release_year'] = release.release_year      container = release.container      container_is_kept = False diff --git a/python/fatcat_web/templates/creator_view.html b/python/fatcat_web/templates/creator_view.html index 2ce01fb6..802ca4c9 100644 --- a/python/fatcat_web/templates/creator_view.html +++ b/python/fatcat_web/templates/creator_view.html @@ -34,8 +34,9 @@  <p>This creator has contributed to:  <ul>    {% for release in releases %} -  <li>"{{ release.title }}", a {{ release.release_type }} published {{ release.release_date }} -      {% if release.release_status != None %}(status: <code>{{ release.release_status }})</code>{% endif %}. +  <li>"{{ release.title }}", a {{ release.release_type }} +      {% if release.release_year != None %}published in {{ release.release_year }}{% endif %} +      {% if release.release_status != None %}(status: <code>{{ release.release_status or "(unknown)" }})</code>{% endif %}.      <br>Fatcat ID: <a href="/release/{{ release.ident }}"><code>{{ release.ident }}</code></a>    {% endfor %}  </ul> diff --git a/python/fatcat_web/templates/release_search.html b/python/fatcat_web/templates/release_search.html index 18bda117..c6a6df60 100644 --- a/python/fatcat_web/templates/release_search.html +++ b/python/fatcat_web/templates/release_search.html @@ -31,8 +31,8 @@    {% if paper.doi %}    DOI: <a href="https://doi.org/{{paper.doi }}" style="color: green;">{{ paper.doi }}</a>    - {{ paper.release_type }} -    {% if paper.release_date %} -    - {{ paper.release_date[:4] }} +    {% if paper.release_year %} +    - {{ paper.release_year }}      {% endif %}    <br>    {% endif %} diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html index 85492eba..fd86b7c9 100644 --- a/python/fatcat_web/templates/release_view.html +++ b/python/fatcat_web/templates/release_view.html @@ -22,7 +22,11 @@  <div class="one wide column"></div>  <div class="ten wide column" style="font-size: 16px;"> -{% if release.release_date != None %}<p><b>Date (published):</b> {{ release.release_date }}{% endif %} +{% if release.release_date != None %} +  <p><b>Date (published):</b> {{ release.release_date }} +{% elif release.release_year != None %} +  <p><b>Date (published):</b> {{ release.release_year }} +{% endif %}  {% if release.pmid != None %}  <br><b>PubMed:</b> <a href="https://www.ncbi.nlm.nih.gov/pubmed/{{ release.pmid }}"> <code>{{ release.pmid }}</code></a>  {% endif %} diff --git a/python/fatcat_web/templates/work_view.html b/python/fatcat_web/templates/work_view.html index 87120e63..c767e1ba 100644 --- a/python/fatcat_web/templates/work_view.html +++ b/python/fatcat_web/templates/work_view.html @@ -34,7 +34,9 @@ still reference the same underlying "work".  {% if releases != [] %}  <ul>    {% for release in releases %} -  <li>"{{ release.title }}", a {{ release.release_type }} published {{ release.release_date }} as <code>{{ release.release_status }}</code>. +  <li>"{{ release.title }}", a {{ release.release_type }} published +    {% if release.release_year != None %}in {{ release.release_year }}{% endif %} +    as <code>{{ release.release_status or "(unknown)" }}</code>.      <br><a href="/release/{{ release.ident }}"><code>{{ release.ident }}</code></a>    {% endfor %}  </ul> diff --git a/rust/migrations/2018-05-12-001226_init/up.sql b/rust/migrations/2018-05-12-001226_init/up.sql index 7754b328..c842295e 100644 --- a/rust/migrations/2018-05-12-001226_init/up.sql +++ b/rust/migrations/2018-05-12-001226_init/up.sql @@ -196,6 +196,7 @@ CREATE TABLE release_rev (      release_type        TEXT, -- TODO: enum      release_status      TEXT, -- TODO: enum      release_date        DATE, +    release_year        BIGINT,      doi                 TEXT,      -- CHECK for length limit for data quality      pmid                TEXT CHECK(octet_length(pmid) <= 12), @@ -430,10 +431,10 @@ INSERT INTO work_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VA      ('00000000-0000-0000-5555-000000000002', '00000000-0000-0000-5555-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', null),      ('00000000-0000-0000-5555-000000000002', '00000000-0000-0000-5555-FFF000000003', null, '00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-5555-FFF000000002'); -INSERT INTO release_rev (id, work_ident_id, container_ident_id, title, release_type, release_status, release_date, doi, wikidata_qid, pmid, pmcid, isbn13, core_id, volume, issue, pages, publisher, language) VALUES -    ('00000000-0000-0000-4444-FFF000000001', '00000000-0000-0000-5555-000000000001',                                   null,                                  'example title',             null,         null,         null,                          null,      null,    null,     null,               null,       null, null, null,  null, null, null), -    ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-5555-000000000002', '00000000-0000-0000-1111-000000000001',                                 'bigger example', 'article-journal',        null, '2018-01-01',                   '10.123/abc', 'Q55555', '54321', 'PMC555','978-3-16-148410-0', '42022773', '12', 'IV', '5-9', 'bogus publishing group', 'cn'), -    ('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-5555-000000000003', '00000000-0000-0000-1111-000000000003', 'Why Most Published Research Findings Are False', 'article-journal', 'published', '2005-08-30', '10.1371/journal.pmed.0020124',     null,    null,     null,               null,       null, '2', '8', 'e124', 'Public Library of Science', 'en'); +INSERT INTO release_rev (id, work_ident_id, container_ident_id, title, release_type, release_status, release_date, release_year, doi, wikidata_qid, pmid, pmcid, isbn13, core_id, volume, issue, pages, publisher, language) VALUES +    ('00000000-0000-0000-4444-FFF000000001', '00000000-0000-0000-5555-000000000001',                                   null,                                  'example title',             null,         null,         null, null,                           null,      null,    null,     null,               null,       null, null, null,  null, null, null), +    ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-5555-000000000002', '00000000-0000-0000-1111-000000000001',                                 'bigger example', 'article-journal',        null, '2018-01-01', 2018,                   '10.123/abc', 'Q55555', '54321', 'PMC555','978-3-16-148410-0', '42022773', '12', 'IV', '5-9', 'bogus publishing group', 'cn'), +    ('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-5555-000000000003', '00000000-0000-0000-1111-000000000003', 'Why Most Published Research Findings Are False', 'article-journal', 'published', '2005-08-30', 2005, '10.1371/journal.pmed.0020124',     null,    null,     null,               null,       null, '2', '8', 'e124', 'Public Library of Science', 'en');  INSERT INTO release_ident (id, is_live, rev_id, redirect_id) VALUES      ('00000000-0000-0000-4444-000000000001', true, '00000000-0000-0000-4444-FFF000000001', null), -- aaaaaaaaaaaaarceaaaaaaaaae diff --git a/rust/src/api_entity_crud.rs b/rust/src/api_entity_crud.rs index 792e6f9a..ee2d4ef3 100644 --- a/rust/src/api_entity_crud.rs +++ b/rust/src/api_entity_crud.rs @@ -166,7 +166,9 @@ macro_rules! generic_db_create_batch {          ) -> Result<Vec<Self::EditRow>> {              if models.iter().any(|m| m.redirect.is_some()) {                  return Err(ErrorKind::OtherBadRequest( -                    "can't create an entity that redirects from the start".to_string()).into()); +                    "can't create an entity that redirects from the start".to_string(), +                ) +                .into());              }              let rev_ids: Vec<Uuid> = Self::db_insert_revs(conn, models)?;              let ident_ids: Vec<Uuid> = insert_into($ident_table::table) @@ -658,7 +660,9 @@ impl EntityCrud for ContainerEntity {          if models.iter().any(|m| m.name.is_none()) {              return Err(ErrorKind::OtherBadRequest( -                "name is required for all Container entities".to_string()).into()); +                "name is required for all Container entities".to_string(), +            ) +            .into());          }          let rev_ids: Vec<Uuid> = insert_into(container_rev::table) @@ -767,14 +771,16 @@ impl EntityCrud for CreatorEntity {          if models.iter().any(|m| m.display_name.is_none()) {              return Err(ErrorKind::OtherBadRequest( -                "display_name is required for all Creator entities".to_string()).into()); +                "display_name is required for all Creator entities".to_string(), +            ) +            .into());          }          let rev_ids: Vec<Uuid> = insert_into(creator_rev::table)              .values(                  models                      .iter() -                    .map(|model|CreatorRevNewRow { +                    .map(|model| CreatorRevNewRow {                          display_name: model.display_name.clone().unwrap(), // unwrapped checked above                          given_name: model.given_name.clone(),                          surname: model.surname.clone(), @@ -996,6 +1002,7 @@ impl EntityCrud for ReleaseEntity {              release_type: None,              release_status: None,              release_date: None, +            release_year: None,              doi: None,              pmid: None,              pmcid: None, @@ -1029,7 +1036,7 @@ impl EntityCrud for ReleaseEntity {      fn db_expand(&mut self, conn: &DbConn, expand: ExpandFlags) -> Result<()> {          // Don't expand deleted entities          if self.state == Some("deleted".to_string()) { -            return Ok(()) +            return Ok(());          }          // TODO: should clarify behavior here. Would hit this path, eg, expanding files on a          // release revision (not ident). Should we fail (Bad Request), or silently just not include @@ -1062,7 +1069,8 @@ impl EntityCrud for ReleaseEntity {                          contrib.creator = Some(CreatorEntity::db_get(                              conn,                              FatCatId::from_str(creator_id)?, -                            HideFlags::none())?); +                            HideFlags::none(), +                        )?);                      }                  }              } @@ -1073,7 +1081,9 @@ impl EntityCrud for ReleaseEntity {      fn db_create(&self, conn: &DbConn, edit_context: &EditContext) -> Result<Self::EditRow> {          if self.redirect.is_some() {              return Err(ErrorKind::OtherBadRequest( -                "can't create an entity that redirects from the start".to_string()).into()); +                "can't create an entity that redirects from the start".to_string(), +            ) +            .into());          }          let mut edits = Self::db_create_batch(conn, edit_context, &[self])?;          // probably a more elegant way to destroy the vec and take first element @@ -1089,7 +1099,9 @@ impl EntityCrud for ReleaseEntity {          // of the release entities passed (at least in the common case)          if models.iter().any(|m| m.redirect.is_some()) {              return Err(ErrorKind::OtherBadRequest( -                "can't create an entity that redirects from the start".to_string()).into()); +                "can't create an entity that redirects from the start".to_string(), +            ) +            .into());          }          // Generate the set of new work entities to insert (usually one for each release, but some @@ -1252,6 +1264,7 @@ impl EntityCrud for ReleaseEntity {              release_type: rev_row.release_type,              release_status: rev_row.release_status,              release_date: rev_row.release_date, +            release_year: rev_row.release_year,              doi: rev_row.doi,              pmid: rev_row.pmid,              pmcid: rev_row.pmcid, @@ -1310,7 +1323,9 @@ impl EntityCrud for ReleaseEntity {          if models.iter().any(|m| m.title.is_none()) {              return Err(ErrorKind::OtherBadRequest( -                "title is required for all Release entities".to_string()).into()); +                "title is required for all Release entities".to_string(), +            ) +            .into());          }          let rev_ids: Vec<Uuid> = insert_into(release_rev::table) @@ -1323,6 +1338,7 @@ impl EntityCrud for ReleaseEntity {                      release_type: model.release_type.clone(),                      release_status: model.release_status.clone(),                      release_date: model.release_date, +                    release_year: model.release_year,                      doi: model.doi.clone(),                      pmid: model.pmid.clone(),                      pmcid: model.pmcid.clone(), diff --git a/rust/src/api_helpers.rs b/rust/src/api_helpers.rs index 32750836..b837dfc2 100644 --- a/rust/src/api_helpers.rs +++ b/rust/src/api_helpers.rs @@ -22,7 +22,6 @@ pub struct EditContext {  }  impl EditContext { -      /// This function should always be run within a transaction      pub fn check(&self, conn: &DbConn) -> Result<()> {          let count: i64 = changelog::table @@ -36,7 +35,6 @@ impl EditContext {      }  } -  #[derive(Clone, Copy, PartialEq)]  pub struct ExpandFlags {      pub files: bool, @@ -456,11 +454,21 @@ pub fn check_sha256(raw: &str) -> Result<()> {  #[test]  fn test_check_sha256() { -    assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_ok()); -    assert!(check_sha256("gb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err()); -    assert!(check_sha256("UB1C378F464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err()); -    assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e45").is_err()); -    assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e4522").is_err()); +    assert!( +        check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_ok() +    ); +    assert!( +        check_sha256("gb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err() +    ); +    assert!( +        check_sha256("UB1C378F464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err() +    ); +    assert!( +        check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e45").is_err() +    ); +    assert!( +        check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e4522").is_err() +    );  }  pub fn check_release_type(raw: &str) -> Result<()> { diff --git a/rust/src/api_server.rs b/rust/src/api_server.rs index adc6ab11..d06de9c1 100644 --- a/rust/src/api_server.rs +++ b/rust/src/api_server.rs @@ -186,7 +186,7 @@ impl Server {                      .filter(file_ident::is_live.eq(true))                      .filter(file_ident::redirect_id.is_null())                      .first(conn)? -            }, +            }              (None, Some(sha1), None) => {                  check_sha1(sha1)?;                  file_ident::table @@ -195,7 +195,7 @@ impl Server {                      .filter(file_ident::is_live.eq(true))                      .filter(file_ident::redirect_id.is_null())                      .first(conn)? -            }, +            }              (None, None, Some(sha256)) => {                  check_sha256(sha256)?;                  file_ident::table @@ -204,7 +204,7 @@ impl Server {                      .filter(file_ident::is_live.eq(true))                      .filter(file_ident::redirect_id.is_null())                      .first(conn)? -            }, +            }              _ => {                  return Err(ErrorKind::MissingOrMultipleExternalId("in lookup".to_string()).into());              } diff --git a/rust/src/database_models.rs b/rust/src/database_models.rs index d5daf9a4..617b150b 100644 --- a/rust/src/database_models.rs +++ b/rust/src/database_models.rs @@ -247,6 +247,7 @@ pub struct ReleaseRevRow {      pub release_type: Option<String>,      pub release_status: Option<String>,      pub release_date: Option<chrono::NaiveDate>, +    pub release_year: Option<i64>,      pub doi: Option<String>,      pub pmid: Option<String>,      pub pmcid: Option<String>, @@ -270,6 +271,7 @@ pub struct ReleaseRevNewRow {      pub release_type: Option<String>,      pub release_status: Option<String>,      pub release_date: Option<chrono::NaiveDate>, +    pub release_year: Option<i64>,      pub doi: Option<String>,      pub pmid: Option<String>,      pub pmcid: Option<String>, diff --git a/rust/src/database_schema.rs b/rust/src/database_schema.rs index 829a21b8..6c1fb929 100644 --- a/rust/src/database_schema.rs +++ b/rust/src/database_schema.rs @@ -211,6 +211,7 @@ table! {          release_type -> Nullable<Text>,          release_status -> Nullable<Text>,          release_date -> Nullable<Date>, +        release_year -> Nullable<Int8>,          doi -> Nullable<Text>,          pmid -> Nullable<Text>,          pmcid -> Nullable<Text>, diff --git a/rust/tests/test_api_server_http.rs b/rust/tests/test_api_server_http.rs index 714cfc68..d6cdb6d3 100644 --- a/rust/tests/test_api_server_http.rs +++ b/rust/tests/test_api_server_http.rs @@ -586,6 +586,7 @@ fn test_post_release() {              r#"{"title": "secret paper",                  "release_type": "article-journal",                  "release_date": "2000-01-02", +                "release_year": 2000,                  "doi": "10.1234/abcde.781231231239",                  "pmid": "54321",                  "pmcid": "PMC12345", @@ -1230,6 +1231,81 @@ fn test_release_dates() {          None,      ); +    // Ok +    check_http_response( +        request::post( +            "http://localhost:9411/v0/release", +            headers.clone(), +            r#"{"title": "secret minimal paper", +                "release_type": "article-journal", +                "release_year": 2000 +                }"#, +            &router, +        ), +        status::Created, +        None, +    ); + +    // Ok; ISO 8601 +    check_http_response( +        request::post( +            "http://localhost:9411/v0/release", +            headers.clone(), +            r#"{"title": "secret minimal paper", +                "release_type": "article-journal", +                "release_year": -100 +                }"#, +            &router, +        ), +        status::Created, +        None, +    ); +    check_http_response( +        request::post( +            "http://localhost:9411/v0/release", +            headers.clone(), +            r#"{"title": "secret minimal paper", +                "release_type": "article-journal", +                "release_year": 0 +                }"#, +            &router, +        ), +        status::Created, +        None, +    ); + +    // Ok +    check_http_response( +        request::post( +            "http://localhost:9411/v0/release", +            headers.clone(), +            r#"{"title": "secret minimal paper", +                "release_type": "article-journal", +                "release_date": "2000-01-02", +                "release_year": 2000 +                }"#, +            &router, +        ), +        status::Created, +        None, +    ); + +    // Ok for now, but may be excluded later +    check_http_response( +        request::post( +            "http://localhost:9411/v0/release", +            headers.clone(), +            r#"{"title": "secret minimal paper", +                "release_type": "article-journal", +                "release_date": "2000-01-02", +                "release_year": 1999 +                }"#, +            &router, +        ), +        status::Created, +        None, +    ); +      // Bad: year/month only      check_http_response(          request::post( | 
