diff options
Diffstat (limited to 'python/fatcat_web/entity_helpers.py')
-rw-r--r-- | python/fatcat_web/entity_helpers.py | 143 |
1 files changed, 123 insertions, 20 deletions
diff --git a/python/fatcat_web/entity_helpers.py b/python/fatcat_web/entity_helpers.py index 86543ee3..2e3b83c5 100644 --- a/python/fatcat_web/entity_helpers.py +++ b/python/fatcat_web/entity_helpers.py @@ -1,4 +1,5 @@ -from typing import Any, Tuple +import difflib +from typing import Any, Dict, List, Tuple from fatcat_openapi_client import ( ContainerEntity, @@ -17,6 +18,7 @@ from flask import abort from fatcat_tools.transforms import ( container_to_elasticsearch, + entity_to_toml, file_to_elasticsearch, release_to_elasticsearch, ) @@ -139,24 +141,38 @@ def enrich_work_entity(entity: WorkEntity) -> WorkEntity: return entity -def generic_get_entity(entity_type: str, ident: str) -> Any: +def generic_get_entity(entity_type: str, ident: str, enrich: bool = True) -> Any: try: - if entity_type == "container": + if entity_type == "container" and enrich: return enrich_container_entity(api.get_container(ident)) - elif entity_type == "creator": + elif entity_type == "container": + return api.get_container(ident) + elif entity_type == "creator" and enrich: return enrich_creator_entity(api.get_creator(ident)) - elif entity_type == "file": + elif entity_type == "creator": + return api.get_creator(ident) + elif entity_type == "file" and enrich: return enrich_file_entity(api.get_file(ident, expand="releases")) - elif entity_type == "fileset": + elif entity_type == "file": + return api.get_file(ident, expand="releases") + elif entity_type == "fileset" and enrich: return enrich_fileset_entity(api.get_fileset(ident, expand="releases")) - elif entity_type == "webcapture": + elif entity_type == "fileset": + return api.get_fileset(ident) + elif entity_type == "webcapture" and enrich: return enrich_webcapture_entity(api.get_webcapture(ident, expand="releases")) - elif entity_type == "release": + elif entity_type == "webcapture": + return api.get_webcapture(ident) + elif entity_type == "release" and enrich: return enrich_release_entity( api.get_release(ident, expand="container,creators,files,filesets,webcaptures") ) - elif entity_type == "work": + elif entity_type == "release": + return api.get_release(ident) + elif entity_type == "work" and enrich: return enrich_work_entity(api.get_work(ident)) + elif entity_type == "work": + return api.get_work(ident) else: raise NotImplementedError except ApiException as ae: @@ -165,30 +181,44 @@ def generic_get_entity(entity_type: str, ident: str) -> Any: abort(400) -def generic_get_entity_revision(entity_type: str, revision_id: str) -> Any: +def generic_get_entity_revision(entity_type: str, revision_id: str, enrich: bool = True) -> Any: try: - if entity_type == "container": + if entity_type == "container" and enrich: return enrich_container_entity(api.get_container_revision(revision_id)) - elif entity_type == "creator": + elif entity_type == "container": + return api.get_container_revision(revision_id) + elif entity_type == "creator" and enrich: return enrich_creator_entity(api.get_creator_revision(revision_id)) - elif entity_type == "file": + elif entity_type == "creator": + return api.get_creator_revision(revision_id) + elif entity_type == "file" and enrich: return enrich_file_entity(api.get_file_revision(revision_id, expand="releases")) - elif entity_type == "fileset": + elif entity_type == "file": + return api.get_file_revision(revision_id) + elif entity_type == "fileset" and enrich: return enrich_fileset_entity( api.get_fileset_revision(revision_id, expand="releases") ) - elif entity_type == "webcapture": + elif entity_type == "fileset": + return api.get_fileset_revision(revision_id) + elif entity_type == "webcapture" and enrich: return enrich_webcapture_entity( api.get_webcapture_revision(revision_id, expand="releases") ) - elif entity_type == "release": + elif entity_type == "webcapture": + return api.get_webcapture_revision(revision_id) + elif entity_type == "release" and enrich: return enrich_release_entity( api.get_release_revision(revision_id, expand="container") ) - elif entity_type == "work": + elif entity_type == "release": + return api.get_release_revision(revision_id) + elif entity_type == "work" and enrich: return enrich_work_entity(api.get_work_revision(revision_id)) + elif entity_type == "work": + return api.get_work_revision(revision_id) else: - raise NotImplementedError + raise NotImplementedError(f"entity_type: {entity_type}") except ApiException as ae: abort(ae.status) except ApiValueError: @@ -217,7 +247,10 @@ def generic_deleted_entity(entity_type: str, ident: str) -> Any: def generic_get_editgroup_entity( - editgroup: Editgroup, entity_type: str, ident: str + editgroup: Editgroup, + entity_type: str, + ident: str, + enrich: bool = True, ) -> Tuple[Any, EntityEdit]: if entity_type == "container": edits = editgroup.edits.containers @@ -250,7 +283,7 @@ def generic_get_editgroup_entity( return generic_deleted_entity(entity_type, ident), edit try: - entity = generic_get_entity_revision(entity_type, revision_id) + entity = generic_get_entity_revision(entity_type, revision_id, enrich=enrich) except ApiException as ae: abort(ae.status) except ApiValueError: @@ -258,3 +291,73 @@ def generic_get_editgroup_entity( entity.ident = ident return entity, edit + + +def _entity_edit_diff(entity_type: str, entity_edit: EntityEdit) -> List[str]: + """ + Helper to generate diff lines for a single entity edit. + + Schema of entity_edit (as a reminder): + + entity_edit + ident + revision + prev_revision + redirect_ident + """ + pop_fields = ["ident", "revision", "state"] + new_rev = generic_get_entity_revision(entity_type, entity_edit.revision, enrich=False) + new_toml = entity_to_toml(new_rev, pop_fields=pop_fields).strip().split("\n") + if len(new_toml) == 1 and not new_toml[0].strip(): + new_toml = [] + if entity_edit.prev_revision: + old_rev = generic_get_entity_revision( + entity_type, entity_edit.prev_revision, enrich=False + ) + old_toml = entity_to_toml(old_rev, pop_fields=pop_fields).strip().split("\n") + fromdesc = f"/{entity_type}/rev/{entity_edit.prev_revision}.toml" + else: + old_toml = [] + fromdesc = "(created)" + + diff_lines = list( + difflib.unified_diff( + old_toml, + new_toml, + fromfile=fromdesc, + tofile=f"/{entity_type}/rev/{entity_edit.revision}.toml", + ) + ) + return diff_lines + + +def editgroup_get_diffs(editgroup: Editgroup) -> Dict[str, Any]: + """ + Fetches before/after entity revisions, and computes "diffs" of TOML representations. + + Returns a dict with entity type (pluralized, like "files"), then within + that a dict with entity ident (without prefix) containing a list of + strings, one per line of the "unified diff" format. If there is no diff for + an edited entity (eg, it was or redirected), instead `None` is returned for + that entity. + """ + diffs: Dict[str, Any] = {} + + for entity_type in [ + "container", + "creator", + "release", + "work", + "file", + "fileset", + "webcapture", + ]: + edits = getattr(editgroup.edits, entity_type + "s") or [] + diffs[entity_type] = {} + for ed in edits: + # only for creation and update + if ed.revision and not ed.redirect_ident: + diffs[entity_type][ed.ident] = _entity_edit_diff(entity_type, ed) + else: + diffs[entity_type][ed.ident] = None + return diffs |