aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-04-11 19:46:46 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-04-11 19:46:46 -0700
commit2eb4e18024d6a7dc4b39e5c1f499a1475acc0e4c (patch)
tree009cd8ca802e9c495692e940629b4a9220eb88c4
parent6048d2252d94dc287f56d6adc8e5ae4bf21b06a0 (diff)
downloadfatcat-2eb4e18024d6a7dc4b39e5c1f499a1475acc0e4c.tar.gz
fatcat-2eb4e18024d6a7dc4b39e5c1f499a1475acc0e4c.zip
files and containers!
-rw-r--r--TODO17
-rw-r--r--fatcat/models.py23
-rw-r--r--fatcat/sql.py37
3 files changed, 62 insertions, 15 deletions
diff --git a/TODO b/TODO
index 8ce67499..db059f15 100644
--- a/TODO
+++ b/TODO
@@ -1,5 +1,20 @@
-helpers... ORM?
+- tests are picking up config.py instead of using setUp()
+
+helpers/ORM and test
+x files
+x containers
+- citations
+x create from crossref dict (naive)
+- "hydrated" get: release, creator, container
+- populate_random_edit; push edit to live
+- helpers with change logging (hook?)
+
+review
+- should release point to work? I think not
+- remove 'state' and 'redirect_id' from all revision tables
later:
+- crossref json import script/benchmark
+ => maybe both "raw" and string-dedupe?
- public IDs are UUID (sqlite hack?)
diff --git a/fatcat/models.py b/fatcat/models.py
index 4106b054..7927ca6e 100644
--- a/fatcat/models.py
+++ b/fatcat/models.py
@@ -6,6 +6,9 @@ states for identifiers:
- redirect: live, points to upstream rev, also points to redirect id
=> if live and redirect non-null, all other fields copied from redirect target
- deleted: live, but doesn't point to a rev
+
+possible refactors:
+- '_rev' instead of '_revision'
"""
from fatcat import db
@@ -31,6 +34,10 @@ release_ref = db.Table("release_ref",
db.Column("stub", db.String, nullable=True),
db.Column("doi", db.String, nullable=True))
+file_release = db.Table("file_release",
+ db.Column("release_id", db.ForeignKey('release_id.id'), nullable=False, primary_key=True),
+ db.Column("file_rev", db.ForeignKey('file_revision.id'), nullable=False, primary_key=True))
+
class WorkId(db.Model):
"""
If revision_id is null, this was deleted.
@@ -63,7 +70,6 @@ class WorkRevision(db.Model):
previous = db.Column(db.ForeignKey('work_revision.id'), nullable=True)
edit_id = db.Column(db.ForeignKey('edit.id'))
extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)
- #work_ids = db.relationship("WorkId", backref="revision", lazy=True)
title = db.Column(db.String)
work_type = db.Column(db.String)
@@ -85,10 +91,9 @@ class ReleaseRevision(db.Model):
redirect_id = db.Column(db.ForeignKey('release_id.id'), nullable=True)
edit_id = db.Column(db.ForeignKey('edit.id'))
extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)
- #release_ids = db.relationship("ReleaseId", backref="revision", lazy=False)
work_id = db.ForeignKey('work_id.id')
- container = db.Column(db.ForeignKey('container_id.id'), nullable=True)
+ container_id = db.Column(db.ForeignKey('container_id.id'), nullable=True)
title = db.Column(db.String, nullable=False)
license = db.Column(db.String, nullable=True) # TODO: oa status foreign key
release_type = db.Column(db.String) # TODO: foreign key
@@ -139,7 +144,7 @@ class ContainerRevision(db.Model):
extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)
name = db.Column(db.String)
- container = db.Column(db.ForeignKey('container_id.id'))
+ container_id = db.Column(db.ForeignKey('container_id.id'))
publisher = db.Column(db.String) # TODO: foreign key
sortname = db.Column(db.String)
issn = db.Column(db.String) # TODO: identifier table
@@ -153,7 +158,7 @@ class FileRevision(db.Model):
__tablename__ = 'file_revision'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
previous = db.Column(db.ForeignKey('file_revision.id'), nullable=True)
- state = db.Column(db.String)
+ state = db.Column(db.String) # TODO: what is this?
redirect_id = db.Column(db.ForeignKey('file_id.id'), nullable=True)
edit_id = db.Column(db.ForeignKey('edit.id'))
extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)
@@ -162,11 +167,9 @@ class FileRevision(db.Model):
sha1 = db.Column(db.Integer) # TODO: hash table... only or in addition?
url = db.Column(db.Integer) # TODO: URL table
-class ReleaseFile(db.Model):
- __tablename__ = 'release_file'
- id = db.Column(db.Integer, primary_key=True, autoincrement=True)
- release_rev = db.Column(db.ForeignKey('release_revision.id'), nullable=False)
- file_id = db.Column(db.ForeignKey('file_id.id'), nullable=False)
+ releases = db.relationship('ReleaseId', secondary=file_release,
+ lazy='subquery')
+ #backref=db.backref('backrefs', lazy=True))
class Edit(db.Model):
__tablename__ = 'edit'
diff --git a/fatcat/sql.py b/fatcat/sql.py
index ace4b269..4b35c904 100644
--- a/fatcat/sql.py
+++ b/fatcat/sql.py
@@ -59,6 +59,18 @@ def populate_complex_db(count=100):
author_revs.append(ar)
author_ids.append(CreatorId(revision_id=ar.id))
+ container_revs = []
+ container_ids = []
+ for _ in range(5):
+ cr = ContainerRevision(
+ name="The Fake Journal of Stuff",
+ container_id=None,
+ publisher="Big Paper",
+ sortname="Fake Journal of Stuff",
+ issn="1234-5678")
+ container_revs.append(cr)
+ container_ids.append(ContainerId(revision_id=cr.id))
+
title_start = ("All about ", "When I grow up I want to be",
"The final word on", "Infinity: ", "The end of")
title_ends = ("Humankind", "Bees", "Democracy", "Avocados", "«küßî»", "“ЌύБЇ”")
@@ -66,6 +78,8 @@ def populate_complex_db(count=100):
work_ids = []
release_revs = []
release_ids = []
+ file_revs = []
+ file_ids = []
for _ in range(count):
title = "{} {}".format(random.choice(title_start), random.choice(title_ends))
work = WorkRevision(title=title)
@@ -74,14 +88,16 @@ def populate_complex_db(count=100):
release = ReleaseRevision(
title=work.title,
creators=list(authors),
- work_id=work.id)
+ work_id=work.id,
+ container_id=random.choice(container_ids).id)
release_id = ReleaseId(revision_id=release.id)
work.primary_release = release.id
authors.add(random.choice(author_ids))
release2 = ReleaseRevision(
title=work.title + " (again)",
creators=list(authors),
- work_id=work.id)
+ work_id=work.id,
+ container_id=random.choice(container_ids).id)
release_id2 = ReleaseId(revision_id=release2.id)
work_revs.append(work)
work_ids.append(work_id)
@@ -90,12 +106,25 @@ def populate_complex_db(count=100):
release_ids.append(release_id)
release_ids.append(release_id2)
+ file_content = str(random.random()) * random.randint(3,100)
+ file_sha = hashlib.sha1(file_content.encode('utf-8')).hexdigest()
+ file_rev = FileRevision(
+ sha1=file_sha,
+ size=len(file_content),
+ url="http://archive.invalid/{}".format(file_sha),
+ releases=[release_id, release_id2],
+ )
+
db.session.add_all(author_revs)
db.session.add_all(author_ids)
db.session.add_all(work_revs)
db.session.add_all(work_ids)
db.session.add_all(release_revs)
db.session.add_all(release_ids)
+ db.session.add_all(container_revs)
+ db.session.add_all(container_ids)
+ db.session.add_all(file_revs)
+ db.session.add_all(file_ids)
db.session.commit()
@@ -118,7 +147,7 @@ def add_crossref(meta):
container = ContainerRevision(
issn=meta['ISSN'][0],
name=meta['container-title'][0],
- container=None,
+ container_id=None,
publisher=meta['publisher'],
sortname=meta['short-container-title'][0])
container_id = ContainerId(revision_id=container.id)
@@ -130,7 +159,7 @@ def add_crossref(meta):
title=title,
creators=author_ids,
work_id=work.id,
- container=container_id.id,
+ container_id=container_id.id,
release_type=meta['type'],
doi=meta['DOI'],
date=meta['created']['date-time'],