diff options
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | TODO | 29 | ||||
-rw-r--r-- | python/README_import.md | 4 | ||||
-rw-r--r-- | python/tests/api_annotations.py | 39 | ||||
-rw-r--r-- | python/tests/api_editgroups.py | 116 | ||||
-rw-r--r-- | python/tests/fixtures.py | 30 | ||||
-rw-r--r-- | rust/src/bin/fatcatd.rs | 1 | ||||
-rw-r--r-- | rust/src/endpoints.rs | 10 |
8 files changed, 184 insertions, 51 deletions
@@ -48,7 +48,7 @@ schema, lives under `./python_client/`. - [x] JSON(B) "extra" metadata fields - [x] full rev1 schema for all entities - [x] file sets and web captures - - [ ] editgroup review: comments? actions? + - [x] editgroup review: annotations - HTTP API Server - [x] base32 encoding of UUID identifiers - [x] inverse many-to-many helpers (files-by-release, release-by-creator) @@ -62,6 +62,6 @@ schema, lives under `./python_client/`. - [x] Basic logging - [x] Swagger-UI - [x] Bulk metadata exports - - [ ] Sentry (error reporting) - - [ ] Metrics + - [x] Sentry (error reporting) + - [x] Metrics @@ -1,32 +1,24 @@ ## In Progress -- basic python tests for editgroup, annotation, submission changes -- python tests for new autoaccept behavior -- python tests for citation table storage efficiency changes - => should there be a distinction between empty list and no references? - yes, eg if expanded or not hidden - => postgres manual checks that this is working - => also benchmark (both speed and efficiency) +- check that any needed/new indices are in place + => seems to at least superficially work +- benchmark citation efficiency (in QA) + +- all query params need to be strings, and parse in rust :( + since=(datetime.datetime.utcnow() + datetime.timedelta(seconds=1)).isoformat()+"Z" +- doc: python client API needs to have booleans set as, eg, 'true'/'false' (str) (!?!?) + "note that non-required or collection query parameters will ignore garbage values, rather than causing a 400 response" ## Next Up - "don't clobber" mode/flag for crossref import (and others?) -- update_file requires 'id'. should it be 'ident'? - => something different about file vs. release -- guide updates for auth -- refactor webface views to use shared entity_view.html template -- handle 'wip' status entities in web UI - elastic inserter should handle deletions and redirects; if state isn't active, delete the document => don't delete, just store state. but need to "blank" redirects and WIP so they don't show up in results => refactor inserter to be a class (eg, for command line use) => end-to-end test of this behavior? -- date handling is really pretty bad for releases; mangling those Jan1/Dec31 - => elastic schema should have a year field (integer) -- document: elastic query date syntax is like: date:[2018-10-01 TO 2018-12-31] -- elastic transform should only include authors, not editors (?) - webcapture timestamp schema cleanup (both CDX and base) => dt.to_rfc3339_opts(SecondsFormat::Secs, true) => but this is mostly buried in serialization code? @@ -43,6 +35,9 @@ - handle very large author/reference lists (instead of dropping) => https://api.crossref.org/v1/works/http://dx.doi.org/10.1007/978-3-319-46095-6_7 => 7000+ authors (!) +- guide updates for auth +- refactor webface views to use shared entity_view.html template +- handle 'wip' status entities in web UI ## Bugs (or at least need tests) @@ -151,6 +146,7 @@ new importers: ## Schema / Entity Fields +- elastic transform should only include authors, not editors (?) - arxiv_id field (keep flip-flopping) - original_title field (internationalization, "original language") - `doi` field for containers (at least for "journal" type; maybe for "series" @@ -162,6 +158,7 @@ new importers: ## Other / Backburner +- document: elastic query date syntax is like: date:[2018-10-01 TO 2018-12-31] - fileset/webcapture webface anything - display abstracts better. no hashes or metadata; prefer plain or HTML, convert JATS if necessary diff --git a/python/README_import.md b/python/README_import.md index cc9a94e1..9dda725d 100644 --- a/python/README_import.md +++ b/python/README_import.md @@ -26,11 +26,13 @@ the others: wget https://archive.org/download/ia_papers_manifest_2018-01-25/index/idents_files_urls.sqlite.gz wget https://archive.org/download/ia_journal_metadata_explore_2018-04-05/journal_extra_metadata.csv wget https://archive.org/download/issn_issnl_mappings/20180216.ISSN-to-ISSN-L.txt - wget https://archive.org/download/orcid-dump-2017/public_profiles_API-2.0_2017_10_json.tar.gz + wget https://archive.org/download/orcid-dump-2017/public_profiles_1_2_json.all.json.gz wget https://archive.org/download/ia_journal_pid_map_munge_20180908/release_ids.ia_munge_20180908.sqlite3.gz wget https://archive.org/download/ia_test_paper_matches/2018-08-27-2352.17-matchcrossref.insertable.json.gz wget https://archive.org/download/ia_papers_manifest_2018-01-25_matched/ia_papers_manifest_2018-01-25.matched.json.gz + gunzip public_profiles_1_2_json.all.json.gz + ## ISSN From CSV file: diff --git a/python/tests/api_annotations.py b/python/tests/api_annotations.py new file mode 100644 index 00000000..0d3c5046 --- /dev/null +++ b/python/tests/api_annotations.py @@ -0,0 +1,39 @@ + +import json +import pytest +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_annotations(api): + + eg = quick_eg(api) + + # ensure no annotations on this object + a = api.get_editgroup_annotations(eg.editgroup_id) + assert a == [] + + # create an annotation! + api.create_editgroup_annotation( + eg.editgroup_id, + EditgroupAnnotation( + comment_markdown="some *annotation*", + extra=dict(thing="thang"))) + + # check that we can fetch it all sorts of ways + a = api.get_editgroup_annotations(eg.editgroup_id) + assert len(a) == 1 + assert a[0].extra['thing'] == "thang" + + # the editor persists, so this is a hack to find a "recent" one + a2 = api.get_editor_annotations(eg.editor_id, limit=100) + found = None + for thing in a2: + if thing.annotation_id == a[0].annotation_id: + found = thing + break + assert thing + assert thing.extra['thing'] == "thang" diff --git a/python/tests/api_editgroups.py b/python/tests/api_editgroups.py new file mode 100644 index 00000000..af55cfeb --- /dev/null +++ b/python/tests/api_editgroups.py @@ -0,0 +1,116 @@ + +import json +import pytest +import datetime +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_editgroup_submit(api): + # 1. check that edit group can be submitted/unsubmitted, and shows up in reviewable appropriately + # 2. accepted edits don't show up as reviewable and can't be submitted + + c1 = CreatorEntity(display_name="test updates") + eg = quick_eg(api) + c1 = api.get_creator(api.create_creator(c1, editgroup_id=eg.editgroup_id).ident) + + eg2 = api.get_editgroup(eg.editgroup_id) + assert not eg2.submitted + assert not eg2.changelog_index + + reviewable = api.get_editgroups_reviewable(limit=100) + assert eg.editgroup_id not in [v.editgroup_id for v in reviewable] + wip = api.get_editor_editgroups(eg.editor_id, limit=100) + assert eg.editgroup_id in [v.editgroup_id for v in wip] + + api.update_editgroup(eg.editgroup_id, eg2, submit='true') + eg3 = api.get_editgroup(eg.editgroup_id) + assert eg3.submitted + reviewable = api.get_editgroups_reviewable(limit=100) + assert eg.editgroup_id in [v.editgroup_id for v in reviewable] + + api.update_editgroup(eg.editgroup_id, eg2, submit='false') + eg3 = api.get_editgroup(eg.editgroup_id) + assert not eg3.submitted + reviewable = api.get_editgroups_reviewable(limit=100) + assert eg.editgroup_id not in [v.editgroup_id for v in reviewable] + + # put back in reviewable + api.update_editgroup(eg.editgroup_id, eg2, submit='true') + reviewable = api.get_editgroups_reviewable(limit=100) + assert eg.editgroup_id in [v.editgroup_id for v in reviewable] + + # shouldn't be reviewable if accepted + api.accept_editgroup(eg.editgroup_id) + reviewable = api.get_editgroups_reviewable(limit=100) + assert eg.editgroup_id not in [v.editgroup_id for v in reviewable] + eg3 = api.get_editgroup(eg.editgroup_id) + #print(eg3) + assert eg3.submitted + assert eg3.changelog_index + + with pytest.raises(fatcat_client.rest.ApiException): + api.update_editgroup(eg.editgroup_id, eg3, submit='true') + with pytest.raises(fatcat_client.rest.ApiException): + eg3.description = "something" + api.update_editgroup(eg.editgroup_id, eg3) + + +def test_editgroup_ordering(api): + + eg1 = quick_eg(api) + eg2 = quick_eg(api) + api.update_editgroup( + eg1.editgroup_id, + Editgroup(editgroup_id=eg1.editgroup_id, description="FAIL"), + submit='true') + api.update_editgroup( + eg2.editgroup_id, + Editgroup(editgroup_id=eg2.editgroup_id, description="FAIL"), + submit='true') + + r1 = api.get_editgroups_reviewable() + #print(r1) + assert not r1[0].description + assert not r1[1].description + assert r1[0].submitted >= r1[1].submitted + + # should be no editgroups "in the future" (since now + 1sec) + r1 = api.get_editgroups_reviewable(since=(datetime.datetime.utcnow() + datetime.timedelta(seconds=1)).isoformat()+"Z") + assert not r1 + + r1 = api.get_editgroups_reviewable(since=(datetime.datetime.utcnow() - datetime.timedelta(seconds=5)).isoformat()+"Z") + assert r1[0].submitted <= r1[1].submitted + + +def test_editgroup_autoaccept(api): + # autoaccept changes: editgroups required when, in what combination + + eg = quick_eg(api) + c1 = CreatorEntity(display_name="test autoaccept") + c2 = CreatorEntity(display_name="test another autoaccept") + + with pytest.raises(fatcat_client.rest.ApiException): + edits = api.create_creator_batch([c1, c2]) + + with pytest.raises(fatcat_client.rest.ApiException): + edits = api.create_creator_batch([c1, c2], editgroup_id=eg.editgroup_id, autoaccept='true') + + edits1 = api.create_creator_batch([c1, c2], editgroup_id=eg.editgroup_id) + edits2 = api.create_creator_batch([c1, c2], autoaccept='true') + + assert edits1[0].editgroup_id == eg.editgroup_id + assert edits1[0].editgroup_id != edits2[1].editgroup_id + eg1 = api.get_editgroup(edits1[0].editgroup_id) + eg2 = api.get_editgroup(edits2[0].editgroup_id) + + assert not eg1.changelog_index + assert eg2.changelog_index + #print(edits1) + #print(eg1.edits.creators) + assert eg1.edits.creators[0].ident == edits1[0].ident + assert eg2.edits.creators[0].ident == edits2[0].ident + diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index 6a880c48..3cc275b3 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -28,6 +28,7 @@ def api(): conf.api_key["Authorization"] = os.getenv("FATCAT_API_AUTH_TOKEN") conf.api_key_prefix["Authorization"] = "Bearer" api_client = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) + api_client.editor_id = "aaaaaaaaaaaabkvkaaaaaaaaae" return api_client def test_get_changelog_entry(api): @@ -38,33 +39,6 @@ def test_get_changelog_entry(api): ## Helpers ################################################################## def quick_eg(api_inst): - eg = api_inst.create_editgroup( - fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) + eg = api_inst.create_editgroup(fatcat_client.Editgroup()) return eg -# TODO: what are these even here for? -def check_entity_fields(e): - for key in ('rev', 'is_live', 'redirect_id'): - assert key in e - for key in ('id',): - assert e[key] is not None - -def check_release(e): - for key in ('work', 'release_type'): - assert key in e - for key in ('title', ): - assert e[key] is not None - for key in ('refs', 'creators'): - assert type(e[key]) == list - -def check_creator(e): - for key in ('name',): - assert e[key] is not None - -def check_container(e): - for key in ('name',): - assert e[key] is not None - -def check_file(e): - for key in ('size', 'sha1'): - assert e[key] is not None diff --git a/rust/src/bin/fatcatd.rs b/rust/src/bin/fatcatd.rs index 75a6f000..ccce6725 100644 --- a/rust/src/bin/fatcatd.rs +++ b/rust/src/bin/fatcatd.rs @@ -88,7 +88,6 @@ fn main() -> Result<()> { server.metrics.incr("restart").unwrap(); } }; - info!(logger, "{:#?}", server.metrics); info!( logger, diff --git a/rust/src/endpoints.rs b/rust/src/endpoints.rs index f7e93448..670c7fd9 100644 --- a/rust/src/endpoints.rs +++ b/rust/src/endpoints.rs @@ -143,13 +143,19 @@ macro_rules! wrap_entity_handlers { let conn = self.db_pool.get().expect("db_pool error"); let ret = match conn.transaction(|| { let auth_context = self.auth_confectionary.require_auth(&conn, &context.auth_data, Some(stringify!($post_batch_fn)))?; - auth_context.require_role(FatcatRole::Editor)?; + let autoaccept = autoaccept.unwrap_or(false); + if autoaccept { + auth_context.require_role(FatcatRole::Admin)?; + } else { + auth_context.require_role(FatcatRole::Editor)?; + }; let editgroup_id = if let Some(s) = editgroup_id { + // make_edit_context() checks for "both editgroup_id and autosubmit" error case let eg_id = FatcatId::from_str(&s)?; auth_context.require_editgroup(&conn, eg_id)?; Some(eg_id) } else { None }; - self.$post_batch_handler(&conn, entity_list, autoaccept.unwrap_or(false), auth_context.editor_id, editgroup_id) + self.$post_batch_handler(&conn, entity_list, autoaccept, auth_context.editor_id, editgroup_id) }).map_err(|e| FatcatError::from(e)) { Ok(edits) => { self.metrics.count("entities.created", edits.len() as i64).ok(); |