aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--extra/elasticsearch/release_schema.json2
-rw-r--r--python/fatcat_tools/importers/crossref.py17
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py6
-rw-r--r--python/fatcat_tools/transforms.py4
-rw-r--r--python/fatcat_web/templates/creator_view.html5
-rw-r--r--python/fatcat_web/templates/release_search.html4
-rw-r--r--python/fatcat_web/templates/release_view.html6
-rw-r--r--python/fatcat_web/templates/work_view.html4
-rw-r--r--rust/migrations/2018-05-12-001226_init/up.sql9
-rw-r--r--rust/src/api_entity_crud.rs34
-rw-r--r--rust/src/api_helpers.rs22
-rw-r--r--rust/src/api_server.rs6
-rw-r--r--rust/src/database_models.rs2
-rw-r--r--rust/src/database_schema.rs1
-rw-r--r--rust/tests/test_api_server_http.rs76
15 files changed, 160 insertions, 38 deletions
diff --git a/extra/elasticsearch/release_schema.json b/extra/elasticsearch/release_schema.json
index 25478b1b..c9b77301 100644
--- a/extra/elasticsearch/release_schema.json
+++ b/extra/elasticsearch/release_schema.json
@@ -33,10 +33,12 @@
"author": { "type": "alias", "path": "contrib_names" },
"journal": { "type": "alias", "path": "container_name" },
"date": { "type": "alias", "path": "release_date" },
+ "year": { "type": "alias", "path": "release_year" },
"issn": { "type": "alias", "path": "container_issnl" },
"oa": { "type": "alias", "path": "container_is_oa" },
"longtail": { "type": "alias", "path": "container_is_longtail_oa" },
"release_date": { "type": "date" },
+ "release_year": { "type": "integer" },
"release_type": { "type": "keyword" },
"release_status": { "type": "keyword" },
"language": { "type": "keyword" },
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 1ea47707..13179207 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -250,16 +250,18 @@ class CrossrefImporter(FatcatImporter):
return None
# release date parsing is amazingly complex
- release_date = obj['issued']['date-parts'][0]
- if not release_date or not release_date[0]:
+ raw_date = obj['issued']['date-parts'][0]
+ if not raw_date or not raw_date[0]:
# got some NoneType, even though at least year is supposed to be set
+ release_year = None
release_date = None
- elif len(release_date) == 3:
- release_date = datetime.date(year=release_date[0], month=release_date[1], day=release_date[2])
+ elif len(raw_date) == 3:
+ release_year = raw_date[0]
+ release_date = datetime.date(year=raw_date[0], month=raw_date[1], day=raw_date[2])
else:
- # only the year is actually required; mangle to first day for date
- # (TODO: something better?)
- release_date = datetime.date(year=release_date[0], month=1, day=1)
+ # sometimes only the year is included, not the full date
+ release_year = raw_date[0]
+ release_date = None
re = fatcat_client.ReleaseEntity(
work_id=None,
@@ -277,6 +279,7 @@ class CrossrefImporter(FatcatImporter):
pmcid=extids['pmcid'],
wikidata_qid=extids['wikidata_qid'],
release_date=release_date,
+ release_year=release_year,
issue=obj.get('issue'),
volume=obj.get('volume'),
pages=obj.get('page'),
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index b84f7145..47a753a6 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -66,9 +66,10 @@ class GrobidMetadataImporter(FatcatImporter):
refs.append(ref)
release_date = None
+ release_year = None
if obj.get('date'):
- # TODO: only returns year, ever? how to handle?
- release_date = datetime.datetime(year=int(obj['date'][:4]), month=1, day=1).date()
+ # only returns year, ever?
+ release_year = int(obj['date'][:4])
if obj.get('doi'):
extra['doi'] = obj['doi']
@@ -88,6 +89,7 @@ class GrobidMetadataImporter(FatcatImporter):
title=obj['title'].strip(),
release_type="article-journal",
release_date=release_date,
+ release_year=release_year,
contribs=contribs,
refs=refs,
publisher=obj['journal'].get('publisher'),
diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py
index 516b68ae..843c00a5 100644
--- a/python/fatcat_tools/transforms.py
+++ b/python/fatcat_tools/transforms.py
@@ -48,6 +48,10 @@ def release_to_elasticsearch(release):
if release.release_date:
# .isoformat() results in, eg, '2010-10-22' (YYYY-MM-DD)
t['release_date'] = release.release_date.isoformat()
+ if release.release_year is None:
+ t['release_year'] = release.release_date.year
+ if release.release_year is not None:
+ t['release_year'] = release.release_year
container = release.container
container_is_kept = False
diff --git a/python/fatcat_web/templates/creator_view.html b/python/fatcat_web/templates/creator_view.html
index 2ce01fb6..802ca4c9 100644
--- a/python/fatcat_web/templates/creator_view.html
+++ b/python/fatcat_web/templates/creator_view.html
@@ -34,8 +34,9 @@
<p>This creator has contributed to:
<ul>
{% for release in releases %}
- <li>"{{ release.title }}", a {{ release.release_type }} published {{ release.release_date }}
- {% if release.release_status != None %}(status: <code>{{ release.release_status }})</code>{% endif %}.
+ <li>"{{ release.title }}", a {{ release.release_type }}
+ {% if release.release_year != None %}published in {{ release.release_year }}{% endif %}
+ {% if release.release_status != None %}(status: <code>{{ release.release_status or "(unknown)" }})</code>{% endif %}.
<br>Fatcat ID: <a href="/release/{{ release.ident }}"><code>{{ release.ident }}</code></a>
{% endfor %}
</ul>
diff --git a/python/fatcat_web/templates/release_search.html b/python/fatcat_web/templates/release_search.html
index 18bda117..c6a6df60 100644
--- a/python/fatcat_web/templates/release_search.html
+++ b/python/fatcat_web/templates/release_search.html
@@ -31,8 +31,8 @@
{% if paper.doi %}
DOI: <a href="https://doi.org/{{paper.doi }}" style="color: green;">{{ paper.doi }}</a>
- {{ paper.release_type }}
- {% if paper.release_date %}
- - {{ paper.release_date[:4] }}
+ {% if paper.release_year %}
+ - {{ paper.release_year }}
{% endif %}
<br>
{% endif %}
diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html
index 85492eba..fd86b7c9 100644
--- a/python/fatcat_web/templates/release_view.html
+++ b/python/fatcat_web/templates/release_view.html
@@ -22,7 +22,11 @@
<div class="one wide column"></div>
<div class="ten wide column" style="font-size: 16px;">
-{% if release.release_date != None %}<p><b>Date (published):</b> {{ release.release_date }}{% endif %}
+{% if release.release_date != None %}
+ <p><b>Date (published):</b> {{ release.release_date }}
+{% elif release.release_year != None %}
+ <p><b>Date (published):</b> {{ release.release_year }}
+{% endif %}
{% if release.pmid != None %}
<br><b>PubMed:</b> <a href="https://www.ncbi.nlm.nih.gov/pubmed/{{ release.pmid }}">&nbsp;<code>{{ release.pmid }}</code></a>
{% endif %}
diff --git a/python/fatcat_web/templates/work_view.html b/python/fatcat_web/templates/work_view.html
index 87120e63..c767e1ba 100644
--- a/python/fatcat_web/templates/work_view.html
+++ b/python/fatcat_web/templates/work_view.html
@@ -34,7 +34,9 @@ still reference the same underlying "work".
{% if releases != [] %}
<ul>
{% for release in releases %}
- <li>"{{ release.title }}", a {{ release.release_type }} published {{ release.release_date }} as <code>{{ release.release_status }}</code>.
+ <li>"{{ release.title }}", a {{ release.release_type }} published
+ {% if release.release_year != None %}in {{ release.release_year }}{% endif %}
+ as <code>{{ release.release_status or "(unknown)" }}</code>.
<br><a href="/release/{{ release.ident }}"><code>{{ release.ident }}</code></a>
{% endfor %}
</ul>
diff --git a/rust/migrations/2018-05-12-001226_init/up.sql b/rust/migrations/2018-05-12-001226_init/up.sql
index 7754b328..c842295e 100644
--- a/rust/migrations/2018-05-12-001226_init/up.sql
+++ b/rust/migrations/2018-05-12-001226_init/up.sql
@@ -196,6 +196,7 @@ CREATE TABLE release_rev (
release_type TEXT, -- TODO: enum
release_status TEXT, -- TODO: enum
release_date DATE,
+ release_year BIGINT,
doi TEXT,
-- CHECK for length limit for data quality
pmid TEXT CHECK(octet_length(pmid) <= 12),
@@ -430,10 +431,10 @@ INSERT INTO work_edit (ident_id, rev_id, redirect_id, editgroup_id, prev_rev) VA
('00000000-0000-0000-5555-000000000002', '00000000-0000-0000-5555-FFF000000002', null, '00000000-0000-0000-BBBB-000000000004', null),
('00000000-0000-0000-5555-000000000002', '00000000-0000-0000-5555-FFF000000003', null, '00000000-0000-0000-BBBB-000000000005', '00000000-0000-0000-5555-FFF000000002');
-INSERT INTO release_rev (id, work_ident_id, container_ident_id, title, release_type, release_status, release_date, doi, wikidata_qid, pmid, pmcid, isbn13, core_id, volume, issue, pages, publisher, language) VALUES
- ('00000000-0000-0000-4444-FFF000000001', '00000000-0000-0000-5555-000000000001', null, 'example title', null, null, null, null, null, null, null, null, null, null, null, null, null, null),
- ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-5555-000000000002', '00000000-0000-0000-1111-000000000001', 'bigger example', 'article-journal', null, '2018-01-01', '10.123/abc', 'Q55555', '54321', 'PMC555','978-3-16-148410-0', '42022773', '12', 'IV', '5-9', 'bogus publishing group', 'cn'),
- ('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-5555-000000000003', '00000000-0000-0000-1111-000000000003', 'Why Most Published Research Findings Are False', 'article-journal', 'published', '2005-08-30', '10.1371/journal.pmed.0020124', null, null, null, null, null, '2', '8', 'e124', 'Public Library of Science', 'en');
+INSERT INTO release_rev (id, work_ident_id, container_ident_id, title, release_type, release_status, release_date, release_year, doi, wikidata_qid, pmid, pmcid, isbn13, core_id, volume, issue, pages, publisher, language) VALUES
+ ('00000000-0000-0000-4444-FFF000000001', '00000000-0000-0000-5555-000000000001', null, 'example title', null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+ ('00000000-0000-0000-4444-FFF000000002', '00000000-0000-0000-5555-000000000002', '00000000-0000-0000-1111-000000000001', 'bigger example', 'article-journal', null, '2018-01-01', 2018, '10.123/abc', 'Q55555', '54321', 'PMC555','978-3-16-148410-0', '42022773', '12', 'IV', '5-9', 'bogus publishing group', 'cn'),
+ ('00000000-0000-0000-4444-FFF000000003', '00000000-0000-0000-5555-000000000003', '00000000-0000-0000-1111-000000000003', 'Why Most Published Research Findings Are False', 'article-journal', 'published', '2005-08-30', 2005, '10.1371/journal.pmed.0020124', null, null, null, null, null, '2', '8', 'e124', 'Public Library of Science', 'en');
INSERT INTO release_ident (id, is_live, rev_id, redirect_id) VALUES
('00000000-0000-0000-4444-000000000001', true, '00000000-0000-0000-4444-FFF000000001', null), -- aaaaaaaaaaaaarceaaaaaaaaae
diff --git a/rust/src/api_entity_crud.rs b/rust/src/api_entity_crud.rs
index 792e6f9a..ee2d4ef3 100644
--- a/rust/src/api_entity_crud.rs
+++ b/rust/src/api_entity_crud.rs
@@ -166,7 +166,9 @@ macro_rules! generic_db_create_batch {
) -> Result<Vec<Self::EditRow>> {
if models.iter().any(|m| m.redirect.is_some()) {
return Err(ErrorKind::OtherBadRequest(
- "can't create an entity that redirects from the start".to_string()).into());
+ "can't create an entity that redirects from the start".to_string(),
+ )
+ .into());
}
let rev_ids: Vec<Uuid> = Self::db_insert_revs(conn, models)?;
let ident_ids: Vec<Uuid> = insert_into($ident_table::table)
@@ -658,7 +660,9 @@ impl EntityCrud for ContainerEntity {
if models.iter().any(|m| m.name.is_none()) {
return Err(ErrorKind::OtherBadRequest(
- "name is required for all Container entities".to_string()).into());
+ "name is required for all Container entities".to_string(),
+ )
+ .into());
}
let rev_ids: Vec<Uuid> = insert_into(container_rev::table)
@@ -767,14 +771,16 @@ impl EntityCrud for CreatorEntity {
if models.iter().any(|m| m.display_name.is_none()) {
return Err(ErrorKind::OtherBadRequest(
- "display_name is required for all Creator entities".to_string()).into());
+ "display_name is required for all Creator entities".to_string(),
+ )
+ .into());
}
let rev_ids: Vec<Uuid> = insert_into(creator_rev::table)
.values(
models
.iter()
- .map(|model|CreatorRevNewRow {
+ .map(|model| CreatorRevNewRow {
display_name: model.display_name.clone().unwrap(), // unwrapped checked above
given_name: model.given_name.clone(),
surname: model.surname.clone(),
@@ -996,6 +1002,7 @@ impl EntityCrud for ReleaseEntity {
release_type: None,
release_status: None,
release_date: None,
+ release_year: None,
doi: None,
pmid: None,
pmcid: None,
@@ -1029,7 +1036,7 @@ impl EntityCrud for ReleaseEntity {
fn db_expand(&mut self, conn: &DbConn, expand: ExpandFlags) -> Result<()> {
// Don't expand deleted entities
if self.state == Some("deleted".to_string()) {
- return Ok(())
+ return Ok(());
}
// TODO: should clarify behavior here. Would hit this path, eg, expanding files on a
// release revision (not ident). Should we fail (Bad Request), or silently just not include
@@ -1062,7 +1069,8 @@ impl EntityCrud for ReleaseEntity {
contrib.creator = Some(CreatorEntity::db_get(
conn,
FatCatId::from_str(creator_id)?,
- HideFlags::none())?);
+ HideFlags::none(),
+ )?);
}
}
}
@@ -1073,7 +1081,9 @@ impl EntityCrud for ReleaseEntity {
fn db_create(&self, conn: &DbConn, edit_context: &EditContext) -> Result<Self::EditRow> {
if self.redirect.is_some() {
return Err(ErrorKind::OtherBadRequest(
- "can't create an entity that redirects from the start".to_string()).into());
+ "can't create an entity that redirects from the start".to_string(),
+ )
+ .into());
}
let mut edits = Self::db_create_batch(conn, edit_context, &[self])?;
// probably a more elegant way to destroy the vec and take first element
@@ -1089,7 +1099,9 @@ impl EntityCrud for ReleaseEntity {
// of the release entities passed (at least in the common case)
if models.iter().any(|m| m.redirect.is_some()) {
return Err(ErrorKind::OtherBadRequest(
- "can't create an entity that redirects from the start".to_string()).into());
+ "can't create an entity that redirects from the start".to_string(),
+ )
+ .into());
}
// Generate the set of new work entities to insert (usually one for each release, but some
@@ -1252,6 +1264,7 @@ impl EntityCrud for ReleaseEntity {
release_type: rev_row.release_type,
release_status: rev_row.release_status,
release_date: rev_row.release_date,
+ release_year: rev_row.release_year,
doi: rev_row.doi,
pmid: rev_row.pmid,
pmcid: rev_row.pmcid,
@@ -1310,7 +1323,9 @@ impl EntityCrud for ReleaseEntity {
if models.iter().any(|m| m.title.is_none()) {
return Err(ErrorKind::OtherBadRequest(
- "title is required for all Release entities".to_string()).into());
+ "title is required for all Release entities".to_string(),
+ )
+ .into());
}
let rev_ids: Vec<Uuid> = insert_into(release_rev::table)
@@ -1323,6 +1338,7 @@ impl EntityCrud for ReleaseEntity {
release_type: model.release_type.clone(),
release_status: model.release_status.clone(),
release_date: model.release_date,
+ release_year: model.release_year,
doi: model.doi.clone(),
pmid: model.pmid.clone(),
pmcid: model.pmcid.clone(),
diff --git a/rust/src/api_helpers.rs b/rust/src/api_helpers.rs
index 32750836..b837dfc2 100644
--- a/rust/src/api_helpers.rs
+++ b/rust/src/api_helpers.rs
@@ -22,7 +22,6 @@ pub struct EditContext {
}
impl EditContext {
-
/// This function should always be run within a transaction
pub fn check(&self, conn: &DbConn) -> Result<()> {
let count: i64 = changelog::table
@@ -36,7 +35,6 @@ impl EditContext {
}
}
-
#[derive(Clone, Copy, PartialEq)]
pub struct ExpandFlags {
pub files: bool,
@@ -456,11 +454,21 @@ pub fn check_sha256(raw: &str) -> Result<()> {
#[test]
fn test_check_sha256() {
- assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_ok());
- assert!(check_sha256("gb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err());
- assert!(check_sha256("UB1C378F464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err());
- assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e45").is_err());
- assert!(check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e4522").is_err());
+ assert!(
+ check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_ok()
+ );
+ assert!(
+ check_sha256("gb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err()
+ );
+ assert!(
+ check_sha256("UB1C378F464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e452").is_err()
+ );
+ assert!(
+ check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e45").is_err()
+ );
+ assert!(
+ check_sha256("cb1c378f464d5935ddaa8de28446d82638396c61f042295d7fb85e3cccc9e4522").is_err()
+ );
}
pub fn check_release_type(raw: &str) -> Result<()> {
diff --git a/rust/src/api_server.rs b/rust/src/api_server.rs
index adc6ab11..d06de9c1 100644
--- a/rust/src/api_server.rs
+++ b/rust/src/api_server.rs
@@ -186,7 +186,7 @@ impl Server {
.filter(file_ident::is_live.eq(true))
.filter(file_ident::redirect_id.is_null())
.first(conn)?
- },
+ }
(None, Some(sha1), None) => {
check_sha1(sha1)?;
file_ident::table
@@ -195,7 +195,7 @@ impl Server {
.filter(file_ident::is_live.eq(true))
.filter(file_ident::redirect_id.is_null())
.first(conn)?
- },
+ }
(None, None, Some(sha256)) => {
check_sha256(sha256)?;
file_ident::table
@@ -204,7 +204,7 @@ impl Server {
.filter(file_ident::is_live.eq(true))
.filter(file_ident::redirect_id.is_null())
.first(conn)?
- },
+ }
_ => {
return Err(ErrorKind::MissingOrMultipleExternalId("in lookup".to_string()).into());
}
diff --git a/rust/src/database_models.rs b/rust/src/database_models.rs
index d5daf9a4..617b150b 100644
--- a/rust/src/database_models.rs
+++ b/rust/src/database_models.rs
@@ -247,6 +247,7 @@ pub struct ReleaseRevRow {
pub release_type: Option<String>,
pub release_status: Option<String>,
pub release_date: Option<chrono::NaiveDate>,
+ pub release_year: Option<i64>,
pub doi: Option<String>,
pub pmid: Option<String>,
pub pmcid: Option<String>,
@@ -270,6 +271,7 @@ pub struct ReleaseRevNewRow {
pub release_type: Option<String>,
pub release_status: Option<String>,
pub release_date: Option<chrono::NaiveDate>,
+ pub release_year: Option<i64>,
pub doi: Option<String>,
pub pmid: Option<String>,
pub pmcid: Option<String>,
diff --git a/rust/src/database_schema.rs b/rust/src/database_schema.rs
index 829a21b8..6c1fb929 100644
--- a/rust/src/database_schema.rs
+++ b/rust/src/database_schema.rs
@@ -211,6 +211,7 @@ table! {
release_type -> Nullable<Text>,
release_status -> Nullable<Text>,
release_date -> Nullable<Date>,
+ release_year -> Nullable<Int8>,
doi -> Nullable<Text>,
pmid -> Nullable<Text>,
pmcid -> Nullable<Text>,
diff --git a/rust/tests/test_api_server_http.rs b/rust/tests/test_api_server_http.rs
index 714cfc68..d6cdb6d3 100644
--- a/rust/tests/test_api_server_http.rs
+++ b/rust/tests/test_api_server_http.rs
@@ -586,6 +586,7 @@ fn test_post_release() {
r#"{"title": "secret paper",
"release_type": "article-journal",
"release_date": "2000-01-02",
+ "release_year": 2000,
"doi": "10.1234/abcde.781231231239",
"pmid": "54321",
"pmcid": "PMC12345",
@@ -1230,6 +1231,81 @@ fn test_release_dates() {
None,
);
+ // Ok
+ check_http_response(
+ request::post(
+ "http://localhost:9411/v0/release",
+ headers.clone(),
+ r#"{"title": "secret minimal paper",
+ "release_type": "article-journal",
+ "release_year": 2000
+ }"#,
+ &router,
+ ),
+ status::Created,
+ None,
+ );
+
+ // Ok; ISO 8601
+ check_http_response(
+ request::post(
+ "http://localhost:9411/v0/release",
+ headers.clone(),
+ r#"{"title": "secret minimal paper",
+ "release_type": "article-journal",
+ "release_year": -100
+ }"#,
+ &router,
+ ),
+ status::Created,
+ None,
+ );
+ check_http_response(
+ request::post(
+ "http://localhost:9411/v0/release",
+ headers.clone(),
+ r#"{"title": "secret minimal paper",
+ "release_type": "article-journal",
+ "release_year": 0
+ }"#,
+ &router,
+ ),
+ status::Created,
+ None,
+ );
+
+ // Ok
+ check_http_response(
+ request::post(
+ "http://localhost:9411/v0/release",
+ headers.clone(),
+ r#"{"title": "secret minimal paper",
+ "release_type": "article-journal",
+ "release_date": "2000-01-02",
+ "release_year": 2000
+ }"#,
+ &router,
+ ),
+ status::Created,
+ None,
+ );
+
+ // Ok for now, but may be excluded later
+ check_http_response(
+ request::post(
+ "http://localhost:9411/v0/release",
+ headers.clone(),
+ r#"{"title": "secret minimal paper",
+ "release_type": "article-journal",
+ "release_date": "2000-01-02",
+ "release_year": 1999
+ }"#,
+ &router,
+ ),
+ status::Created,
+ None,
+ );
+
// Bad: year/month only
check_http_response(
request::post(