diff options
| -rw-r--r-- | TODO | 17 | ||||
| -rw-r--r-- | fatcat/models.py | 23 | ||||
| -rw-r--r-- | fatcat/sql.py | 37 | 
3 files changed, 62 insertions, 15 deletions
| @@ -1,5 +1,20 @@ -helpers... ORM? +- tests are picking up config.py instead of using setUp() + +helpers/ORM and test +x files +x containers +- citations +x create from crossref dict (naive) +- "hydrated" get: release, creator, container +- populate_random_edit; push edit to live +- helpers with change logging (hook?) + +review +- should release point to work? I think not +- remove 'state' and 'redirect_id' from all revision tables  later: +- crossref json import script/benchmark +    => maybe both "raw" and string-dedupe?  - public IDs are UUID (sqlite hack?) diff --git a/fatcat/models.py b/fatcat/models.py index 4106b054..7927ca6e 100644 --- a/fatcat/models.py +++ b/fatcat/models.py @@ -6,6 +6,9 @@ states for identifiers:  - redirect: live, points to upstream rev, also points to redirect id      => if live and redirect non-null, all other fields copied from redirect target  - deleted: live, but doesn't point to a rev + +possible refactors: +- '_rev' instead of '_revision'  """  from fatcat import db @@ -31,6 +34,10 @@ release_ref = db.Table("release_ref",      db.Column("stub", db.String, nullable=True),      db.Column("doi", db.String, nullable=True)) +file_release = db.Table("file_release", +    db.Column("release_id", db.ForeignKey('release_id.id'), nullable=False, primary_key=True), +    db.Column("file_rev", db.ForeignKey('file_revision.id'), nullable=False, primary_key=True)) +  class WorkId(db.Model):      """      If revision_id is null, this was deleted. @@ -63,7 +70,6 @@ class WorkRevision(db.Model):      previous = db.Column(db.ForeignKey('work_revision.id'), nullable=True)      edit_id = db.Column(db.ForeignKey('edit.id'))      extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) -    #work_ids = db.relationship("WorkId", backref="revision", lazy=True)      title = db.Column(db.String)      work_type = db.Column(db.String) @@ -85,10 +91,9 @@ class ReleaseRevision(db.Model):      redirect_id = db.Column(db.ForeignKey('release_id.id'), nullable=True)      edit_id = db.Column(db.ForeignKey('edit.id'))      extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) -    #release_ids = db.relationship("ReleaseId", backref="revision", lazy=False)      work_id = db.ForeignKey('work_id.id') -    container = db.Column(db.ForeignKey('container_id.id'), nullable=True) +    container_id = db.Column(db.ForeignKey('container_id.id'), nullable=True)      title = db.Column(db.String, nullable=False)      license = db.Column(db.String, nullable=True)   # TODO: oa status foreign key      release_type = db.Column(db.String)             # TODO: foreign key @@ -139,7 +144,7 @@ class ContainerRevision(db.Model):      extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)      name = db.Column(db.String) -    container = db.Column(db.ForeignKey('container_id.id')) +    container_id = db.Column(db.ForeignKey('container_id.id'))      publisher = db.Column(db.String)        # TODO: foreign key      sortname = db.Column(db.String)      issn = db.Column(db.String)             # TODO: identifier table @@ -153,7 +158,7 @@ class FileRevision(db.Model):      __tablename__ = 'file_revision'      id = db.Column(db.Integer, primary_key=True, autoincrement=True)      previous = db.Column(db.ForeignKey('file_revision.id'), nullable=True) -    state = db.Column(db.String) +    state = db.Column(db.String)            # TODO: what is this?      redirect_id = db.Column(db.ForeignKey('file_id.id'), nullable=True)      edit_id = db.Column(db.ForeignKey('edit.id'))      extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) @@ -162,11 +167,9 @@ class FileRevision(db.Model):      sha1 = db.Column(db.Integer)            # TODO: hash table... only or in addition?      url = db.Column(db.Integer)             # TODO: URL table -class ReleaseFile(db.Model): -    __tablename__ = 'release_file' -    id = db.Column(db.Integer, primary_key=True, autoincrement=True) -    release_rev = db.Column(db.ForeignKey('release_revision.id'), nullable=False) -    file_id = db.Column(db.ForeignKey('file_id.id'), nullable=False) +    releases = db.relationship('ReleaseId', secondary=file_release, +        lazy='subquery') +        #backref=db.backref('backrefs', lazy=True))  class Edit(db.Model):      __tablename__ = 'edit' diff --git a/fatcat/sql.py b/fatcat/sql.py index ace4b269..4b35c904 100644 --- a/fatcat/sql.py +++ b/fatcat/sql.py @@ -59,6 +59,18 @@ def populate_complex_db(count=100):          author_revs.append(ar)          author_ids.append(CreatorId(revision_id=ar.id)) +    container_revs = [] +    container_ids = [] +    for _ in range(5): +        cr = ContainerRevision( +            name="The Fake Journal of Stuff", +            container_id=None, +            publisher="Big Paper", +            sortname="Fake Journal of Stuff", +            issn="1234-5678") +        container_revs.append(cr) +        container_ids.append(ContainerId(revision_id=cr.id)) +      title_start = ("All about ", "When I grow up I want to be",          "The final word on", "Infinity: ", "The end of")      title_ends = ("Humankind", "Bees", "Democracy", "Avocados", "«küßî»", "“ЌύБЇ”") @@ -66,6 +78,8 @@ def populate_complex_db(count=100):      work_ids = []      release_revs = []      release_ids = [] +    file_revs = [] +    file_ids = []      for _ in range(count):          title = "{} {}".format(random.choice(title_start), random.choice(title_ends))          work = WorkRevision(title=title) @@ -74,14 +88,16 @@ def populate_complex_db(count=100):          release = ReleaseRevision(              title=work.title,              creators=list(authors), -            work_id=work.id) +            work_id=work.id, +            container_id=random.choice(container_ids).id)          release_id = ReleaseId(revision_id=release.id)          work.primary_release = release.id          authors.add(random.choice(author_ids))          release2 = ReleaseRevision(              title=work.title + " (again)",              creators=list(authors), -            work_id=work.id) +            work_id=work.id, +            container_id=random.choice(container_ids).id)          release_id2 = ReleaseId(revision_id=release2.id)          work_revs.append(work)          work_ids.append(work_id) @@ -90,12 +106,25 @@ def populate_complex_db(count=100):          release_ids.append(release_id)          release_ids.append(release_id2) +        file_content = str(random.random()) * random.randint(3,100) +        file_sha = hashlib.sha1(file_content.encode('utf-8')).hexdigest() +        file_rev = FileRevision( +            sha1=file_sha, +            size=len(file_content), +            url="http://archive.invalid/{}".format(file_sha), +            releases=[release_id, release_id2], +        ) +      db.session.add_all(author_revs)      db.session.add_all(author_ids)      db.session.add_all(work_revs)      db.session.add_all(work_ids)      db.session.add_all(release_revs)      db.session.add_all(release_ids) +    db.session.add_all(container_revs) +    db.session.add_all(container_ids) +    db.session.add_all(file_revs) +    db.session.add_all(file_ids)      db.session.commit() @@ -118,7 +147,7 @@ def add_crossref(meta):      container = ContainerRevision(          issn=meta['ISSN'][0],          name=meta['container-title'][0], -        container=None, +        container_id=None,          publisher=meta['publisher'],          sortname=meta['short-container-title'][0])      container_id = ContainerId(revision_id=container.id) @@ -130,7 +159,7 @@ def add_crossref(meta):          title=title,          creators=author_ids,          work_id=work.id, -        container=container_id.id, +        container_id=container_id.id,          release_type=meta['type'],          doi=meta['DOI'],          date=meta['created']['date-time'], | 
