summaryrefslogtreecommitdiffstats
path: root/python/fatcat_web/entity_helpers.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_web/entity_helpers.py')
-rw-r--r--python/fatcat_web/entity_helpers.py143
1 files changed, 123 insertions, 20 deletions
diff --git a/python/fatcat_web/entity_helpers.py b/python/fatcat_web/entity_helpers.py
index 86543ee3..2e3b83c5 100644
--- a/python/fatcat_web/entity_helpers.py
+++ b/python/fatcat_web/entity_helpers.py
@@ -1,4 +1,5 @@
-from typing import Any, Tuple
+import difflib
+from typing import Any, Dict, List, Tuple
from fatcat_openapi_client import (
ContainerEntity,
@@ -17,6 +18,7 @@ from flask import abort
from fatcat_tools.transforms import (
container_to_elasticsearch,
+ entity_to_toml,
file_to_elasticsearch,
release_to_elasticsearch,
)
@@ -139,24 +141,38 @@ def enrich_work_entity(entity: WorkEntity) -> WorkEntity:
return entity
-def generic_get_entity(entity_type: str, ident: str) -> Any:
+def generic_get_entity(entity_type: str, ident: str, enrich: bool = True) -> Any:
try:
- if entity_type == "container":
+ if entity_type == "container" and enrich:
return enrich_container_entity(api.get_container(ident))
- elif entity_type == "creator":
+ elif entity_type == "container":
+ return api.get_container(ident)
+ elif entity_type == "creator" and enrich:
return enrich_creator_entity(api.get_creator(ident))
- elif entity_type == "file":
+ elif entity_type == "creator":
+ return api.get_creator(ident)
+ elif entity_type == "file" and enrich:
return enrich_file_entity(api.get_file(ident, expand="releases"))
- elif entity_type == "fileset":
+ elif entity_type == "file":
+ return api.get_file(ident, expand="releases")
+ elif entity_type == "fileset" and enrich:
return enrich_fileset_entity(api.get_fileset(ident, expand="releases"))
- elif entity_type == "webcapture":
+ elif entity_type == "fileset":
+ return api.get_fileset(ident)
+ elif entity_type == "webcapture" and enrich:
return enrich_webcapture_entity(api.get_webcapture(ident, expand="releases"))
- elif entity_type == "release":
+ elif entity_type == "webcapture":
+ return api.get_webcapture(ident)
+ elif entity_type == "release" and enrich:
return enrich_release_entity(
api.get_release(ident, expand="container,creators,files,filesets,webcaptures")
)
- elif entity_type == "work":
+ elif entity_type == "release":
+ return api.get_release(ident)
+ elif entity_type == "work" and enrich:
return enrich_work_entity(api.get_work(ident))
+ elif entity_type == "work":
+ return api.get_work(ident)
else:
raise NotImplementedError
except ApiException as ae:
@@ -165,30 +181,44 @@ def generic_get_entity(entity_type: str, ident: str) -> Any:
abort(400)
-def generic_get_entity_revision(entity_type: str, revision_id: str) -> Any:
+def generic_get_entity_revision(entity_type: str, revision_id: str, enrich: bool = True) -> Any:
try:
- if entity_type == "container":
+ if entity_type == "container" and enrich:
return enrich_container_entity(api.get_container_revision(revision_id))
- elif entity_type == "creator":
+ elif entity_type == "container":
+ return api.get_container_revision(revision_id)
+ elif entity_type == "creator" and enrich:
return enrich_creator_entity(api.get_creator_revision(revision_id))
- elif entity_type == "file":
+ elif entity_type == "creator":
+ return api.get_creator_revision(revision_id)
+ elif entity_type == "file" and enrich:
return enrich_file_entity(api.get_file_revision(revision_id, expand="releases"))
- elif entity_type == "fileset":
+ elif entity_type == "file":
+ return api.get_file_revision(revision_id)
+ elif entity_type == "fileset" and enrich:
return enrich_fileset_entity(
api.get_fileset_revision(revision_id, expand="releases")
)
- elif entity_type == "webcapture":
+ elif entity_type == "fileset":
+ return api.get_fileset_revision(revision_id)
+ elif entity_type == "webcapture" and enrich:
return enrich_webcapture_entity(
api.get_webcapture_revision(revision_id, expand="releases")
)
- elif entity_type == "release":
+ elif entity_type == "webcapture":
+ return api.get_webcapture_revision(revision_id)
+ elif entity_type == "release" and enrich:
return enrich_release_entity(
api.get_release_revision(revision_id, expand="container")
)
- elif entity_type == "work":
+ elif entity_type == "release":
+ return api.get_release_revision(revision_id)
+ elif entity_type == "work" and enrich:
return enrich_work_entity(api.get_work_revision(revision_id))
+ elif entity_type == "work":
+ return api.get_work_revision(revision_id)
else:
- raise NotImplementedError
+ raise NotImplementedError(f"entity_type: {entity_type}")
except ApiException as ae:
abort(ae.status)
except ApiValueError:
@@ -217,7 +247,10 @@ def generic_deleted_entity(entity_type: str, ident: str) -> Any:
def generic_get_editgroup_entity(
- editgroup: Editgroup, entity_type: str, ident: str
+ editgroup: Editgroup,
+ entity_type: str,
+ ident: str,
+ enrich: bool = True,
) -> Tuple[Any, EntityEdit]:
if entity_type == "container":
edits = editgroup.edits.containers
@@ -250,7 +283,7 @@ def generic_get_editgroup_entity(
return generic_deleted_entity(entity_type, ident), edit
try:
- entity = generic_get_entity_revision(entity_type, revision_id)
+ entity = generic_get_entity_revision(entity_type, revision_id, enrich=enrich)
except ApiException as ae:
abort(ae.status)
except ApiValueError:
@@ -258,3 +291,73 @@ def generic_get_editgroup_entity(
entity.ident = ident
return entity, edit
+
+
+def _entity_edit_diff(entity_type: str, entity_edit: EntityEdit) -> List[str]:
+ """
+ Helper to generate diff lines for a single entity edit.
+
+ Schema of entity_edit (as a reminder):
+
+ entity_edit
+ ident
+ revision
+ prev_revision
+ redirect_ident
+ """
+ pop_fields = ["ident", "revision", "state"]
+ new_rev = generic_get_entity_revision(entity_type, entity_edit.revision, enrich=False)
+ new_toml = entity_to_toml(new_rev, pop_fields=pop_fields).strip().split("\n")
+ if len(new_toml) == 1 and not new_toml[0].strip():
+ new_toml = []
+ if entity_edit.prev_revision:
+ old_rev = generic_get_entity_revision(
+ entity_type, entity_edit.prev_revision, enrich=False
+ )
+ old_toml = entity_to_toml(old_rev, pop_fields=pop_fields).strip().split("\n")
+ fromdesc = f"/{entity_type}/rev/{entity_edit.prev_revision}.toml"
+ else:
+ old_toml = []
+ fromdesc = "(created)"
+
+ diff_lines = list(
+ difflib.unified_diff(
+ old_toml,
+ new_toml,
+ fromfile=fromdesc,
+ tofile=f"/{entity_type}/rev/{entity_edit.revision}.toml",
+ )
+ )
+ return diff_lines
+
+
+def editgroup_get_diffs(editgroup: Editgroup) -> Dict[str, Any]:
+ """
+ Fetches before/after entity revisions, and computes "diffs" of TOML representations.
+
+ Returns a dict with entity type (pluralized, like "files"), then within
+ that a dict with entity ident (without prefix) containing a list of
+ strings, one per line of the "unified diff" format. If there is no diff for
+ an edited entity (eg, it was or redirected), instead `None` is returned for
+ that entity.
+ """
+ diffs: Dict[str, Any] = {}
+
+ for entity_type in [
+ "container",
+ "creator",
+ "release",
+ "work",
+ "file",
+ "fileset",
+ "webcapture",
+ ]:
+ edits = getattr(editgroup.edits, entity_type + "s") or []
+ diffs[entity_type] = {}
+ for ed in edits:
+ # only for creation and update
+ if ed.revision and not ed.redirect_ident:
+ diffs[entity_type][ed.ident] = _entity_edit_diff(entity_type, ed)
+ else:
+ diffs[entity_type][ed.ident] = None
+ return diffs