commit old work

author: Bryan Newbold <bnewbold@robocracy.org> 2018-04-11 15:03:18 -0700
committer: Bryan Newbold <bnewbold@robocracy.org> 2018-04-11 15:03:18 -0700
commit: 2c769d488334bc6aab9f65a7c66e18442949d482 (patch)
tree: f469db7c98da3cce46c7cb2756d897834a168a6c
parent: f8920f46b46282670229bdb7cf12052d167c26e8 (diff)
download: fatcat-2c769d488334bc6aab9f65a7c66e18442949d482.tar.gz
fatcat-2c769d488334bc6aab9f65a7c66e18442949d482.zip
2 files changed, 45 insertions, 7 deletions
diff --git a/fatcat/models.py b/fatcat/models.py
index dd84070d..2f0bfd72 100644
--- a/fatcat/models.py
+++ b/fatcat/models.py
@@ -16,16 +16,34 @@ release_contrib = db.Table("release_contrib",
     db.Column("stub", db.String, nullable=True))
 
 class WorkId(db.Model):
+    """
+    If revision_id is null, this was deleted.
+    If redirect_id is not null, this has been merged with the given id. In this
+        case revision_id is a "cached" copy of the redirect's revision_id, as
+        an optimization. If the merged work is "deleted", revision_id can be
+        null and redirect_id not-null.
+    """
     __tablename__ = 'work_id'
-    id = db.Column(db.Integer, primary_key=True)
-    revision_id = db.Column(db.ForeignKey('work_revision.id'))
+    id = db.Column(db.Integer, primary_key=True, nullable=False)
+    revision_id = db.Column(db.ForeignKey('work_revision.id'), nullable=True)
+    redirect_id = db.Column(db.ForeignKey('work_id.id'), nullable=True)
+
+class WorkLog(db.Model):
+    __tablename__ = 'work_log'
+    # ID is a monotonic int here; important for ordering!
+    id = db.Column(db.Integer, primary_key=True, nullable=False)
+    work_id = db.Column(db.ForeignKey('work_id.id'), nullable=False)
+    old_revision_id = db.Column(db.ForeignKey('work_revision.id'), nullable=True)
+    old_redirect_id = db.Column(db.ForeignKey('work_id.id'), nullable=True)
+    new_revision_id = db.Column(db.ForeignKey('work_revision.id'), nullable=True)
+    new_redirect_id = db.Column(db.ForeignKey('work_id.id'), nullable=True)
+    # TODO: is this right?
+    edit_id = db.Column(db.ForeignKey('edit.id'))
 
 class WorkRevision(db.Model):
     __tablename__ = 'work_revision'
     id = db.Column(db.Integer, primary_key=True)
     previous = db.Column(db.ForeignKey('work_revision.id'), nullable=True)
-    state = db.Column(db.String)
-    redirect_id = db.Column(db.ForeignKey('work_id.id'), nullable=True)
     edit_id = db.Column(db.ForeignKey('edit.id'))
     extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)
     #work_ids = db.relationship("WorkId", backref="revision", lazy=True)
@@ -130,9 +148,14 @@ class ReleaseFil(db.Model):
 class Edit(db.Model):
     __tablename__ = 'edit'
     id = db.Column(db.Integer, primary_key=True, autoincrement=True)
-    edit_group = db.Column(db.ForeignKey('edit_group.id'))
-    editor = db.Column(db.ForeignKey('editor.id'))
-    description = db.Column(db.String)
+    edit_group = db.Column(db.ForeignKey('edit_group.id'), nullable=True)
+    editor = db.Column(db.ForeignKey('editor.id'), nullable=False)
+    comment = db.Column(db.String, nullable=True)
+    extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)
+    # WARNING: polymorphic. Represents the id that should end up pointing to
+    # this revision.
+    # TODO: this doesn't work
+    entity_id = db.Column(db.Integer, nullable=True)
 
 class EditGroup(db.Model):
     __tablename__ = 'edit_group'
diff --git a/next_thoughts.txt b/next_thoughts.txt
index 0e89249a..8c7d12fc 100644
--- a/next_thoughts.txt
+++ b/next_thoughts.txt
@@ -1,3 +1,4 @@
+
 Should probably just UUID all the (public) ids.
 
 Instead of having a separate id pointer table, could have an extra "mutable"
@@ -17,3 +18,17 @@ Trade-off: easy querying for common case (wanting "active" rows) vs. robust
 handling of redirects (likely to be pretty common). Also, having UUID handling
 across more than one table.
 
+## Scaling database
+
+Two scaling issues: size of database due to edits (likely billions of rows) and 
+desire to do complex queries/reports ("analytics"). The later is probably not a
+concern, and could be handled by dumping and working on a cluster (or secondary
+views, etc). So just a distraction? Simpler to have all rolled up.
+
+Cockroach is postgres-like; might be able to use that for HA and scaling?
+Bottlenecks are probably complex joins (mitigated by "interleave"?) and bulk
+import performance (one-time?).
+
+Using elastic for most (eg, non-logged-in) views could keep things fast.
+
+Cockroach seems more resourced/polished than TiDB?
author	Bryan Newbold <bnewbold@robocracy.org>	2018-04-11 15:03:18 -0700
committer	Bryan Newbold <bnewbold@robocracy.org>	2018-04-11 15:03:18 -0700
commit	2c769d488334bc6aab9f65a7c66e18442949d482 (patch)
tree	f469db7c98da3cce46c7cb2756d897834a168a6c
parent	f8920f46b46282670229bdb7cf12052d167c26e8 (diff)
download	fatcat-2c769d488334bc6aab9f65a7c66e18442949d482.tar.gz fatcat-2c769d488334bc6aab9f65a7c66e18442949d482.zip