diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2018-04-13 16:16:36 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-04-13 16:16:36 -0700 | 
| commit | bd6f8f46e2ac52352b8077f1de8c382a303ce45c (patch) | |
| tree | 4fcf0c812c3fd0d8174d2c73e9fd62c7171a4740 | |
| parent | 1b4017c929b92c313f55606b5ca1339a170c7fdb (diff) | |
| download | fatcat-bd6f8f46e2ac52352b8077f1de8c382a303ce45c.tar.gz fatcat-bd6f8f46e2ac52352b8077f1de8c382a303ce45c.zip | |
large refactor of model file
| -rw-r--r-- | fatcat/models.py | 220 | ||||
| -rw-r--r-- | fatcat/sql.py | 34 | 
2 files changed, 145 insertions, 109 deletions
| diff --git a/fatcat/models.py b/fatcat/models.py index f1fd31e3..975a5bb7 100644 --- a/fatcat/models.py +++ b/fatcat/models.py @@ -8,117 +8,101 @@ states for identifiers:  - deleted: live, but doesn't point to a rev  possible refactors: -- '_rev' instead of '_revision' +- '_rev' instead of '_rev' +- use mixins for entities  """  from fatcat import db -# TODO: EntityMixin, EntityIdMixin + +### Inter-Entity Relationships ###############################################  class WorkContrib(db.Model):      __tablename__ = "work_contrib" -    work_rev= db.Column(db.ForeignKey('work_revision.id'), nullable=False, primary_key=True) +    work_rev= db.Column(db.ForeignKey('work_rev.id'), nullable=False, primary_key=True)      creator_ident_id = db.Column(db.ForeignKey('creator_ident.id'), nullable=False, primary_key=True)      type = db.Column(db.String, nullable=True)      stub = db.Column(db.String, nullable=True)      creator = db.relationship("CreatorIdent") -    work = db.relationship("WorkRevision") +    work = db.relationship("WorkRev")  class ReleaseContrib(db.Model):      __tablename__ = "release_contrib" -    release_rev = db.Column(db.ForeignKey('release_revision.id'), nullable=False, primary_key=True) +    release_rev = db.Column(db.ForeignKey('release_rev.id'), nullable=False, primary_key=True)      creator_ident_id = db.Column(db.ForeignKey('creator_ident.id'), nullable=False, primary_key=True)      type = db.Column(db.String, nullable=True)      stub = db.Column(db.String, nullable=True)      creator = db.relationship("CreatorIdent") -    release = db.relationship("ReleaseRevision") +    release = db.relationship("ReleaseRev")  class ReleaseRef(db.Model):      __tablename__ = "release_ref"      id = db.Column(db.Integer, primary_key=True, nullable=False) -    release_rev = db.Column(db.ForeignKey('release_revision.id'), nullable=False) +    release_rev = db.Column(db.ForeignKey('release_rev.id'), nullable=False)      target_release_ident_id = db.Column(db.ForeignKey('release_ident.id'), nullable=True)      index = db.Column(db.Integer, nullable=True)      stub = db.Column(db.String, nullable=True)      doi = db.Column(db.String, nullable=True) -    release = db.relationship("ReleaseRevision") +    release = db.relationship("ReleaseRev")      target = db.relationship("ReleaseIdent")  class FileRelease(db.Model):      __tablename__ = "file_release"      id = db.Column(db.Integer, primary_key=True, nullable=False) -    file_rev= db.Column(db.ForeignKey('file_revision.id'), nullable=False) +    file_rev= db.Column(db.ForeignKey('file_rev.id'), nullable=False)      release_ident_id = db.Column(db.ForeignKey('release_ident.id'), nullable=False)      release = db.relationship("ReleaseIdent") -    file = db.relationship("FileRevision") +    file = db.relationship("FileRev") + + +### Entities ################################################################# + +class WorkRev(db.Model): +    __tablename__ = 'work_rev' +    id = db.Column(db.Integer, primary_key=True) +    edit_id = db.Column(db.ForeignKey('work_edit.id')) +    extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) + +    title = db.Column(db.String) +    work_type = db.Column(db.String) +    primary_release_id = db.Column(db.ForeignKey('release_ident.id'), nullable=True) + +    creators = db.relationship('WorkContrib', lazy='subquery', +        backref=db.backref('works', lazy=True))  class WorkIdent(db.Model):      """ -    If revision_id is null, this was deleted. +    If rev_id is null, this was deleted.      If redirect_id is not null, this has been merged with the given id. In this -        case revision_id is a "cached" copy of the redirect's revision_id, as -        an optimization. If the merged work is "deleted", revision_id can be +        case rev_id is a "cached" copy of the redirect's rev_id, as +        an optimization. If the merged work is "deleted", rev_id can be          null and redirect_id not-null.      """      __tablename__ = 'work_ident'      id = db.Column(db.Integer, primary_key=True, nullable=False)      live = db.Column(db.Boolean, nullable=False, default=False) -    revision_id = db.Column(db.ForeignKey('work_revision.id'), nullable=True) +    rev_id = db.Column(db.ForeignKey('work_rev.id'), nullable=True)      redirect_id = db.Column(db.ForeignKey('work_ident.id'), nullable=True) -    revision = db.relationship("WorkRevision") - -class WorkLog(db.Model): -    __tablename__ = 'work_log' -    # ID is a monotonic int here; important for ordering! -    id = db.Column(db.Integer, primary_key=True, nullable=False) -    work_ident_id = db.Column(db.ForeignKey('work_ident.id'), nullable=False) -    #old_revision_id = db.Column(db.ForeignKey('work_revision.id'), nullable=True) -    #old_redirect_id = db.Column(db.ForeignKey('work_ident.id'), nullable=True) -    new_revision_id = db.Column(db.ForeignKey('work_revision.id'), nullable=True) -    new_redirect_id = db.Column(db.ForeignKey('work_ident.id'), nullable=True) -    # TODO: is this right? -    edit_id = db.Column(db.ForeignKey('edit.id')) +    revision = db.relationship("WorkRev")  class WorkEdit(db.Model):      __tablename__ = 'work_edit' -    id = db.Column(db.Integer, primary_key=True, autoincrement=True) +    id = db.Column(db.Integer, primary_key=True)      ident_id = db.Column(db.ForeignKey('work_ident.id'), nullable=True) -    revision_id = db.Column(db.ForeignKey('work_revision.id'), nullable=True) +    rev_id = db.Column(db.ForeignKey('work_rev.id'), nullable=True)      redirect_id = db.Column(db.ForeignKey('work_ident.id'), nullable=True)      edit_group = db.Column(db.ForeignKey('edit_group.id'), nullable=True) -    editor = db.Column(db.ForeignKey('editor.id'), nullable=False) -    comment = db.Column(db.String, nullable=True)      extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) -class WorkRevision(db.Model): -    __tablename__ = 'work_revision' -    id = db.Column(db.Integer, primary_key=True) -    edit_id = db.Column(db.ForeignKey('edit.id')) -    extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) -    title = db.Column(db.String) -    work_type = db.Column(db.String) -    primary_release_id = db.Column(db.ForeignKey('release_ident.id'), nullable=True) - -    creators = db.relationship('WorkContrib', lazy='subquery', -        backref=db.backref('works', lazy=True)) - -class ReleaseIdent(db.Model): -    __tablename__ = 'release_ident' -    id = db.Column(db.Integer, primary_key=True) -    live = db.Column(db.Boolean, nullable=False, default=False) -    revision_id = db.Column(db.ForeignKey('release_revision.id')) -    redirect_id = db.Column(db.ForeignKey('release_ident.id'), nullable=True) -    revision = db.relationship("ReleaseRevision") - -class ReleaseRevision(db.Model): -    __tablename__ = 'release_revision' +class ReleaseRev(db.Model): +    __tablename__ = 'release_rev'      id = db.Column(db.Integer, primary_key=True, autoincrement=True) -    edit_id = db.Column(db.ForeignKey('edit.id')) +    edit_id = db.Column(db.ForeignKey('release_edit.id'))      extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)      work_ident_id = db.ForeignKey('work_ident.id') @@ -137,37 +121,56 @@ class ReleaseRevision(db.Model):      creators = db.relationship('ReleaseContrib', lazy='subquery')      refs = db.relationship('ReleaseRef', lazy='subquery') -class CreatorIdent(db.Model): -    __tablename__ = 'creator_ident' +class ReleaseIdent(db.Model): +    __tablename__ = 'release_ident'      id = db.Column(db.Integer, primary_key=True)      live = db.Column(db.Boolean, nullable=False, default=False) -    revision_id = db.Column(db.ForeignKey('creator_revision.id')) -    redirect_id = db.Column(db.ForeignKey('creator_ident.id'), nullable=True) -    revision = db.relationship("CreatorRevision") +    rev_id = db.Column(db.ForeignKey('release_rev.id')) +    redirect_id = db.Column(db.ForeignKey('release_ident.id'), nullable=True) +    revision = db.relationship("ReleaseRev") + +class ReleaseEdit(db.Model): +    __tablename__ = 'release_edit' +    id = db.Column(db.Integer, primary_key=True) +    ident_id = db.Column(db.ForeignKey('release_ident.id'), nullable=True) +    rev_id = db.Column(db.ForeignKey('release_rev.id'), nullable=True) +    redirect_id = db.Column(db.ForeignKey('release_ident.id'), nullable=True) +    edit_group = db.Column(db.ForeignKey('edit_group.id'), nullable=True) +    extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) + -class CreatorRevision(db.Model): -    __tablename__ = 'creator_revision' +class CreatorRev(db.Model): +    __tablename__ = 'creator_rev'      id = db.Column(db.Integer, primary_key=True, autoincrement=True) -    edit_id = db.Column(db.ForeignKey('edit.id')) +    edit_id = db.Column(db.ForeignKey('creator_edit.id'))      extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) -    #creator_ids = db.relationship("CreatorIdent", backref="revision", lazy=False)      name = db.Column(db.String)      sortname = db.Column(db.String)      orcid = db.Column(db.String)            # TODO: identifier table -class ContainerIdent(db.Model): -    __tablename__ = 'container_ident' +class CreatorIdent(db.Model): +    __tablename__ = 'creator_ident'      id = db.Column(db.Integer, primary_key=True)      live = db.Column(db.Boolean, nullable=False, default=False) -    revision_id = db.Column(db.ForeignKey('container_revision.id')) -    redirect_id = db.Column(db.ForeignKey('container_ident.id'), nullable=True) -    revision = db.relationship("ContainerRevision") +    rev_id = db.Column(db.ForeignKey('creator_rev.id')) +    redirect_id = db.Column(db.ForeignKey('creator_ident.id'), nullable=True) +    revision = db.relationship("CreatorRev") + +class CreatorEdit(db.Model): +    __tablename__ = 'creator_edit' +    id = db.Column(db.Integer, primary_key=True) +    ident_id = db.Column(db.ForeignKey('creator_ident.id'), nullable=True) +    rev_id = db.Column(db.ForeignKey('creator_rev.id'), nullable=True) +    redirect_id = db.Column(db.ForeignKey('creator_ident.id'), nullable=True) +    edit_group = db.Column(db.ForeignKey('edit_group.id'), nullable=True) +    extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) + -class ContainerRevision(db.Model): -    __tablename__ = 'container_revision' +class ContainerRev(db.Model): +    __tablename__ = 'container_rev'      id = db.Column(db.Integer, primary_key=True, autoincrement=True) -    edit_id = db.Column(db.ForeignKey('edit.id')) +    edit_id = db.Column(db.ForeignKey('container_edit.id'))      extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)      name = db.Column(db.String) @@ -176,55 +179,88 @@ class ContainerRevision(db.Model):      sortname = db.Column(db.String)      issn = db.Column(db.String)             # TODO: identifier table -class FileIdent(db.Model): -    __tablename__ = 'file_ident' +class ContainerIdent(db.Model): +    __tablename__ = 'container_ident'      id = db.Column(db.Integer, primary_key=True)      live = db.Column(db.Boolean, nullable=False, default=False) -    revision_id = db.Column('revision', db.ForeignKey('file_revision.id')) -    redirect_id = db.Column(db.ForeignKey('file_ident.id'), nullable=True) -    revision = db.relationship("FileRevision") +    rev_id = db.Column(db.ForeignKey('container_rev.id')) +    redirect_id = db.Column(db.ForeignKey('container_ident.id'), nullable=True) +    revision = db.relationship("ContainerRev") -class FileRevision(db.Model): -    __tablename__ = 'file_revision' +class ContainerEdit(db.Model): +    __tablename__ = 'container_edit' +    id = db.Column(db.Integer, primary_key=True) +    ident_id = db.Column(db.ForeignKey('container_ident.id'), nullable=True) +    rev_id = db.Column(db.ForeignKey('container_rev.id'), nullable=True) +    redirect_id = db.Column(db.ForeignKey('container_ident.id'), nullable=True) +    edit_group = db.Column(db.ForeignKey('edit_group.id'), nullable=True) +    extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) + + +class FileRev(db.Model): +    __tablename__ = 'file_rev'      id = db.Column(db.Integer, primary_key=True, autoincrement=True) -    edit_id = db.Column(db.ForeignKey('edit.id')) +    edit_id = db.Column(db.ForeignKey('file_edit.id'))      extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)      size = db.Column(db.Integer)      sha1 = db.Column(db.Integer)            # TODO: hash table... only or in addition?      url = db.Column(db.Integer)             # TODO: URL table -      releases = db.relationship('FileRelease', lazy='subquery') -class Edit(db.Model): -    __tablename__ = 'edit' -    id = db.Column(db.Integer, primary_key=True, autoincrement=True) +class FileIdent(db.Model): +    __tablename__ = 'file_ident' +    id = db.Column(db.Integer, primary_key=True) +    live = db.Column(db.Boolean, nullable=False, default=False) +    rev_id = db.Column('revision', db.ForeignKey('file_rev.id')) +    redirect_id = db.Column(db.ForeignKey('file_ident.id'), nullable=True) +    revision = db.relationship("FileRev") + +class FileEdit(db.Model): +    __tablename__ = 'file_edit' +    id = db.Column(db.Integer, primary_key=True) +    ident_id = db.Column(db.ForeignKey('file_ident.id'), nullable=True) +    rev_id = db.Column(db.ForeignKey('file_rev.id'), nullable=True) +    redirect_id = db.Column(db.ForeignKey('file_ident.id'), nullable=True)      edit_group = db.Column(db.ForeignKey('edit_group.id'), nullable=True) -    editor = db.Column(db.ForeignKey('editor.id'), nullable=False) -    comment = db.Column(db.String, nullable=True)      extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) -    # WARNING: polymorphic. Represents the ident that should end up pointing to -    # this revision. -    entity_ident = db.Column(db.Integer, nullable=True) -    entity_rev = db.Column(db.Integer, nullable=True) -    entity_redirect = db.Column(db.Integer, nullable=True) + + +### Editing ################################################################# + +#class Edit(db.Model): +#    __tablename__ = 'edit' +#    id = db.Column(db.Integer, primary_key=True, autoincrement=True) +#    edit_group = db.Column(db.ForeignKey('edit_group.id'), nullable=True) +#    editor = db.Column(db.ForeignKey('editor.id'), nullable=False) +#    comment = db.Column(db.String, nullable=True) +#    extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) +#    # WARNING: polymorphic. Represents the ident that should end up pointing to +#    # this revision. +#    entity_ident = db.Column(db.Integer, nullable=True) +#    entity_rev = db.Column(db.Integer, nullable=True) +#    entity_redirect = db.Column(db.Integer, nullable=True)  class EditGroup(db.Model):      __tablename__ = 'edit_group'      id = db.Column(db.Integer, primary_key=True, autoincrement=True)      editor = db.Column(db.ForeignKey('editor.id'))      description = db.Column(db.String) +    editor = db.Column(db.ForeignKey('editor.id'), nullable=False)  class Editor(db.Model):      __tablename__ = 'editor'      id = db.Column(db.Integer, primary_key=True, autoincrement=True)      username = db.Column(db.String) +    group = db.Column(db.String) + + +### Other ###################################################################  class ChangelogEntry(db.Model): -    # XXX: remove this?      __tablename__= 'changelog'      id = db.Column(db.Integer, primary_key=True, autoincrement=True) -    edit_id = db.Column(db.ForeignKey('edit.id')) +    edit_group_id = db.Column(db.ForeignKey('edit_group.id'))      timestamp = db.Column(db.Integer)  class ExtraJson(db.Model): diff --git a/fatcat/sql.py b/fatcat/sql.py index f268d7be..782f7983 100644 --- a/fatcat/sql.py +++ b/fatcat/sql.py @@ -10,16 +10,16 @@ def populate_db():      TODO: doesn't create an edit trail (yet)      """ -    n_elkies = CreatorRevision( +    n_elkies = CreatorRev(          name="Noam D. Elkies",          sortname="Elkies, N",          orcid=None)      n_elkies_id = CreatorIdent(revision=n_elkies) -    pi_work = WorkRevision( +    pi_work = WorkRev(          title="Why is π^2 so close to 10?",          work_type="journal-article")      pi_work_id = WorkIdent(revision=pi_work) -    pi_release = ReleaseRevision( +    pi_release = ReleaseRev(          title=pi_work.title,          work_ident_id=pi_work.id,          release_type="journal-article") @@ -36,8 +36,8 @@ def populate_db():          pi_release_id])      # TODO: -    #ligo_collab = CreatorRevision(name="LIGO Scientific Collaboration") -    #ligo_paper = ReleaseRevision( +    #ligo_collab = CreatorRev(name="LIGO Scientific Collaboration") +    #ligo_paper = ReleaseRev(      #    title="Full Band All-sky Search for Periodic Gravitational Waves in the O1 LIGO Data")      db.session.commit() @@ -56,7 +56,7 @@ def populate_complex_db(count=100):      for _ in range(count):          first = random.choice(first_names)          last = random.choice(last_names) -        ar = CreatorRevision( +        ar = CreatorRev(              name="{} {}".format(first, last),              sortname="{}, {}".format(last, first[0]),              orcid=None) @@ -66,7 +66,7 @@ def populate_complex_db(count=100):      container_revs = []      container_ids = []      for _ in range(5): -        cr = ContainerRevision( +        cr = ContainerRev(              name="The Fake Journal of Stuff",              #container_id=None,              publisher="Big Paper", @@ -86,10 +86,10 @@ def populate_complex_db(count=100):      file_ids = []      for _ in range(count):          title = "{} {}".format(random.choice(title_start), random.choice(title_ends)) -        work = WorkRevision(title=title) +        work = WorkRev(title=title)          work_id = WorkIdent(revision=work)          authors = set(random.sample(author_ids, 5)) -        release = ReleaseRevision( +        release = ReleaseRev(              title=work.title,              creators=[ReleaseContrib(creator=a) for a in list(authors)],              #work=work, @@ -97,7 +97,7 @@ def populate_complex_db(count=100):          release_id = ReleaseIdent(revision=release)          work.primary_release = release          authors.add(random.choice(author_ids)) -        release2 = ReleaseRevision( +        release2 = ReleaseRev(              title=work.title + " (again)",              creators=[ReleaseContrib(creator=a) for a in list(authors)],              #work=work, @@ -112,7 +112,7 @@ def populate_complex_db(count=100):          file_content = str(random.random()) * random.randint(3,100)          file_sha = hashlib.sha1(file_content.encode('utf-8')).hexdigest() -        file_rev = FileRevision( +        file_rev = FileRev(              sha1=file_sha,              size=len(file_content),              url="http://archive.invalid/{}".format(file_sha), @@ -143,7 +143,7 @@ def add_crossref(meta):      author_revs = []      author_ids = []      for am in meta['author']: -        ar = CreatorRevision( +        ar = CreatorRev(              name="{} {}".format(am['given'], am['family']),              sortname="{}, {}".format(am['family'], am['given']),              orcid=None) @@ -151,7 +151,7 @@ def add_crossref(meta):          author_ids.append(CreatorIdent(revision=ar))      # container -    container = ContainerRevision( +    container = ContainerRev(          issn=meta['ISSN'][0],          name=meta['container-title'][0],          #container_id=None, @@ -160,9 +160,9 @@ def add_crossref(meta):      container_id = ContainerIdent(revision=container)      # work and release -    work = WorkRevision(title=title) +    work = WorkRev(title=title)      work_id = WorkIdent(revision=work) -    release = ReleaseRevision( +    release = ReleaseRev(          title=title,          creators=[ReleaseContrib(creator=a) for a in author_ids],          #work=work, @@ -210,7 +210,7 @@ def hydrate_work(wid):      hydro = {          "_type": "work",          "id": wid, -        "rev": work.revision_id, +        "rev": work.rev_id,          "is_live": work.live,          "redirect_id": work.redirect_id,      } @@ -243,7 +243,7 @@ def hydrate_release(rid):      return {          "_type": "release",          "id": rid, -        "revision": release.revision_id, +        "revision": release.rev_id,          "edit_id": release.revision.edit_id,          "is_live": release.live, | 
