diff options
-rw-r--r-- | TODO | 17 | ||||
-rw-r--r-- | fatcat/models.py | 23 | ||||
-rw-r--r-- | fatcat/sql.py | 37 |
3 files changed, 62 insertions, 15 deletions
@@ -1,5 +1,20 @@ -helpers... ORM? +- tests are picking up config.py instead of using setUp() + +helpers/ORM and test +x files +x containers +- citations +x create from crossref dict (naive) +- "hydrated" get: release, creator, container +- populate_random_edit; push edit to live +- helpers with change logging (hook?) + +review +- should release point to work? I think not +- remove 'state' and 'redirect_id' from all revision tables later: +- crossref json import script/benchmark + => maybe both "raw" and string-dedupe? - public IDs are UUID (sqlite hack?) diff --git a/fatcat/models.py b/fatcat/models.py index 4106b054..7927ca6e 100644 --- a/fatcat/models.py +++ b/fatcat/models.py @@ -6,6 +6,9 @@ states for identifiers: - redirect: live, points to upstream rev, also points to redirect id => if live and redirect non-null, all other fields copied from redirect target - deleted: live, but doesn't point to a rev + +possible refactors: +- '_rev' instead of '_revision' """ from fatcat import db @@ -31,6 +34,10 @@ release_ref = db.Table("release_ref", db.Column("stub", db.String, nullable=True), db.Column("doi", db.String, nullable=True)) +file_release = db.Table("file_release", + db.Column("release_id", db.ForeignKey('release_id.id'), nullable=False, primary_key=True), + db.Column("file_rev", db.ForeignKey('file_revision.id'), nullable=False, primary_key=True)) + class WorkId(db.Model): """ If revision_id is null, this was deleted. @@ -63,7 +70,6 @@ class WorkRevision(db.Model): previous = db.Column(db.ForeignKey('work_revision.id'), nullable=True) edit_id = db.Column(db.ForeignKey('edit.id')) extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) - #work_ids = db.relationship("WorkId", backref="revision", lazy=True) title = db.Column(db.String) work_type = db.Column(db.String) @@ -85,10 +91,9 @@ class ReleaseRevision(db.Model): redirect_id = db.Column(db.ForeignKey('release_id.id'), nullable=True) edit_id = db.Column(db.ForeignKey('edit.id')) extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) - #release_ids = db.relationship("ReleaseId", backref="revision", lazy=False) work_id = db.ForeignKey('work_id.id') - container = db.Column(db.ForeignKey('container_id.id'), nullable=True) + container_id = db.Column(db.ForeignKey('container_id.id'), nullable=True) title = db.Column(db.String, nullable=False) license = db.Column(db.String, nullable=True) # TODO: oa status foreign key release_type = db.Column(db.String) # TODO: foreign key @@ -139,7 +144,7 @@ class ContainerRevision(db.Model): extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) name = db.Column(db.String) - container = db.Column(db.ForeignKey('container_id.id')) + container_id = db.Column(db.ForeignKey('container_id.id')) publisher = db.Column(db.String) # TODO: foreign key sortname = db.Column(db.String) issn = db.Column(db.String) # TODO: identifier table @@ -153,7 +158,7 @@ class FileRevision(db.Model): __tablename__ = 'file_revision' id = db.Column(db.Integer, primary_key=True, autoincrement=True) previous = db.Column(db.ForeignKey('file_revision.id'), nullable=True) - state = db.Column(db.String) + state = db.Column(db.String) # TODO: what is this? redirect_id = db.Column(db.ForeignKey('file_id.id'), nullable=True) edit_id = db.Column(db.ForeignKey('edit.id')) extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) @@ -162,11 +167,9 @@ class FileRevision(db.Model): sha1 = db.Column(db.Integer) # TODO: hash table... only or in addition? url = db.Column(db.Integer) # TODO: URL table -class ReleaseFile(db.Model): - __tablename__ = 'release_file' - id = db.Column(db.Integer, primary_key=True, autoincrement=True) - release_rev = db.Column(db.ForeignKey('release_revision.id'), nullable=False) - file_id = db.Column(db.ForeignKey('file_id.id'), nullable=False) + releases = db.relationship('ReleaseId', secondary=file_release, + lazy='subquery') + #backref=db.backref('backrefs', lazy=True)) class Edit(db.Model): __tablename__ = 'edit' diff --git a/fatcat/sql.py b/fatcat/sql.py index ace4b269..4b35c904 100644 --- a/fatcat/sql.py +++ b/fatcat/sql.py @@ -59,6 +59,18 @@ def populate_complex_db(count=100): author_revs.append(ar) author_ids.append(CreatorId(revision_id=ar.id)) + container_revs = [] + container_ids = [] + for _ in range(5): + cr = ContainerRevision( + name="The Fake Journal of Stuff", + container_id=None, + publisher="Big Paper", + sortname="Fake Journal of Stuff", + issn="1234-5678") + container_revs.append(cr) + container_ids.append(ContainerId(revision_id=cr.id)) + title_start = ("All about ", "When I grow up I want to be", "The final word on", "Infinity: ", "The end of") title_ends = ("Humankind", "Bees", "Democracy", "Avocados", "«küßî»", "“ЌύБЇ”") @@ -66,6 +78,8 @@ def populate_complex_db(count=100): work_ids = [] release_revs = [] release_ids = [] + file_revs = [] + file_ids = [] for _ in range(count): title = "{} {}".format(random.choice(title_start), random.choice(title_ends)) work = WorkRevision(title=title) @@ -74,14 +88,16 @@ def populate_complex_db(count=100): release = ReleaseRevision( title=work.title, creators=list(authors), - work_id=work.id) + work_id=work.id, + container_id=random.choice(container_ids).id) release_id = ReleaseId(revision_id=release.id) work.primary_release = release.id authors.add(random.choice(author_ids)) release2 = ReleaseRevision( title=work.title + " (again)", creators=list(authors), - work_id=work.id) + work_id=work.id, + container_id=random.choice(container_ids).id) release_id2 = ReleaseId(revision_id=release2.id) work_revs.append(work) work_ids.append(work_id) @@ -90,12 +106,25 @@ def populate_complex_db(count=100): release_ids.append(release_id) release_ids.append(release_id2) + file_content = str(random.random()) * random.randint(3,100) + file_sha = hashlib.sha1(file_content.encode('utf-8')).hexdigest() + file_rev = FileRevision( + sha1=file_sha, + size=len(file_content), + url="http://archive.invalid/{}".format(file_sha), + releases=[release_id, release_id2], + ) + db.session.add_all(author_revs) db.session.add_all(author_ids) db.session.add_all(work_revs) db.session.add_all(work_ids) db.session.add_all(release_revs) db.session.add_all(release_ids) + db.session.add_all(container_revs) + db.session.add_all(container_ids) + db.session.add_all(file_revs) + db.session.add_all(file_ids) db.session.commit() @@ -118,7 +147,7 @@ def add_crossref(meta): container = ContainerRevision( issn=meta['ISSN'][0], name=meta['container-title'][0], - container=None, + container_id=None, publisher=meta['publisher'], sortname=meta['short-container-title'][0]) container_id = ContainerId(revision_id=container.id) @@ -130,7 +159,7 @@ def add_crossref(meta): title=title, creators=author_ids, work_id=work.id, - container=container_id.id, + container_id=container_id.id, release_type=meta['type'], doi=meta['DOI'], date=meta['created']['date-time'], |