diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-04-11 19:09:44 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-04-11 19:09:44 -0700 |
commit | 1d2d4aaefb9380709aa5650bc09dc29ea7d348cb (patch) | |
tree | 3f3a165a86f8ee566d9069977fa8d545bedcd708 /fatcat | |
parent | c3ae6357b4824450263d727dc5d23b5cf0e9305f (diff) | |
download | fatcat-1d2d4aaefb9380709aa5650bc09dc29ea7d348cb.tar.gz fatcat-1d2d4aaefb9380709aa5650bc09dc29ea7d348cb.zip |
crude crossref import test
Diffstat (limited to 'fatcat')
-rw-r--r-- | fatcat/models.py | 32 | ||||
-rw-r--r-- | fatcat/sql.py | 75 |
2 files changed, 95 insertions, 12 deletions
diff --git a/fatcat/models.py b/fatcat/models.py index 214ff8ac..78d6b7f5 100644 --- a/fatcat/models.py +++ b/fatcat/models.py @@ -16,13 +16,22 @@ states for identifiers: work_contrib = db.Table("work_contrib", db.Column("work_rev", db.ForeignKey('work_revision.id'), nullable=False, primary_key=True), db.Column("creator_id", db.ForeignKey('creator_id.id'), nullable=False, primary_key=True), + db.Column("type", db.String, nullable=True), db.Column("stub", db.String, nullable=True)) release_contrib = db.Table("release_contrib", db.Column("release_rev", db.ForeignKey('release_revision.id'), nullable=False, primary_key=True), db.Column("creator_id", db.ForeignKey('creator_id.id'), nullable=False, primary_key=True), + db.Column("type", db.String, nullable=True), db.Column("stub", db.String, nullable=True)) +release_ref = db.Table("release_ref", + db.Column("release_rev", db.ForeignKey('release_revision.id'), nullable=False), + db.Column("target_release_id", db.ForeignKey('release_id.id'), nullable=False), + db.Column("index", db.Integer, nullable=True), + db.Column("stub", db.String, nullable=True), + db.Column("doi", db.String, nullable=True)) + class WorkId(db.Model): """ If revision_id is null, this was deleted. @@ -73,23 +82,29 @@ class ReleaseRevision(db.Model): __tablename__ = 'release_revision' id = db.Column(db.Integer, primary_key=True, autoincrement=True) previous = db.Column(db.ForeignKey('release_revision.id'), nullable=True) - state = db.Column(db.String) # TODO: enum + state = db.Column(db.String) # TODO: enum redirect_id = db.Column(db.ForeignKey('release_id.id'), nullable=True) edit_id = db.Column(db.ForeignKey('edit.id')) extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True) #release_ids = db.relationship("ReleaseId", backref="revision", lazy=False) work_id = db.ForeignKey('work_id.id') - container = db.Column(db.ForeignKey('container_id.id')) - title = db.Column(db.String) - license = db.Column(db.String) # TODO: oa status foreign key - release_type = db.Column(db.String) # TODO: foreign key - date = db.Column(db.String) # TODO: datetime - doi = db.Column(db.String) # TODO: identifier table + container = db.Column(db.ForeignKey('container_id.id'), nullable=True) + title = db.Column(db.String, nullable=False) + license = db.Column(db.String, nullable=True) # TODO: oa status foreign key + release_type = db.Column(db.String) # TODO: foreign key + date = db.Column(db.String, nullable=True) # TODO: datetime + doi = db.Column(db.String, nullable=True) # TODO: identifier table + volume = db.Column(db.String, nullable=True) + pages = db.Column(db.String, nullable=True) + issue = db.Column(db.String, nullable=True) creators = db.relationship('CreatorId', secondary=release_contrib, lazy='subquery') #backref=db.backref('releases', lazy=True)) + refs = db.relationship('ReleaseId', secondary=release_ref, + lazy='subquery') + #backref=db.backref('backrefs', lazy=True)) class CreatorId(db.Model): __tablename__ = 'creator_id' @@ -148,7 +163,7 @@ class FileRevision(db.Model): sha1 = db.Column(db.Integer) # TODO: hash table... only or in addition? url = db.Column(db.Integer) # TODO: URL table -class ReleaseFil(db.Model): +class ReleaseFile(db.Model): __tablename__ = 'release_file' id = db.Column(db.Integer, primary_key=True, autoincrement=True) release_rev = db.Column(db.ForeignKey('release_revision.id'), nullable=False) @@ -178,6 +193,7 @@ class Editor(db.Model): username = db.Column(db.String) class ChangelogEntry(db.Model): + # XXX: remove this? __tablename__= 'changelog' id = db.Column(db.Integer, primary_key=True, autoincrement=True) edit_id = db.Column(db.ForeignKey('edit.id')) diff --git a/fatcat/sql.py b/fatcat/sql.py index c6e1aa4e..ace4b269 100644 --- a/fatcat/sql.py +++ b/fatcat/sql.py @@ -1,6 +1,8 @@ +import json import random -from fatcat import app, db +import hashlib +from fatcat import db from fatcat.models import * def populate_db(): @@ -23,11 +25,13 @@ def populate_db(): pi_release_id = ReleaseId(revision_id=pi_release.id) pi_work.primary_release = pi_release.id + # TODO: #pi_file = File( # sha1="efee52e46c86691e2b892dbeb212f3b92e92e9d3", # url="http://www.math.harvard.edu/~elkies/Misc/pi10.pdf") db.session.add_all([n_elkies, pi_work, pi_work_id, pi_release, pi_release_id]) + # TODO: #ligo_collab = CreatorRevision(name="LIGO Scientific Collaboration") #ligo_paper = ReleaseRevision( # title="Full Band All-sky Search for Periodic Gravitational Waves in the O1 LIGO Data") @@ -45,7 +49,7 @@ def populate_complex_db(count=100): author_revs = [] author_ids = [] - for i in range(count): + for _ in range(count): first = random.choice(first_names) last = random.choice(last_names) ar = CreatorRevision( @@ -57,12 +61,12 @@ def populate_complex_db(count=100): title_start = ("All about ", "When I grow up I want to be", "The final word on", "Infinity: ", "The end of") - title_ends = ("Humankind", "Bees", "Democracy", "Avocados") + title_ends = ("Humankind", "Bees", "Democracy", "Avocados", "«küßî»", "“ЌύБЇ”") work_revs = [] work_ids = [] release_revs = [] release_ids = [] - for i in range(count): + for _ in range(count): title = "{} {}".format(random.choice(title_start), random.choice(title_ends)) work = WorkRevision(title=title) work_id = WorkId(revision_id=work.id) @@ -94,3 +98,66 @@ def populate_complex_db(count=100): db.session.add_all(release_ids) db.session.commit() + +def add_crossref(meta): + + title = meta['title'][0] + + # authors + author_revs = [] + author_ids = [] + for am in meta['author']: + ar = CreatorRevision( + name="{} {}".format(am['given'], am['family']), + sortname="{}, {}".format(am['family'], am['given']), + orcid=None) + author_revs.append(ar) + author_ids.append(CreatorId(revision_id=ar.id)) + + # container + container = ContainerRevision( + issn=meta['ISSN'][0], + name=meta['container-title'][0], + container=None, + publisher=meta['publisher'], + sortname=meta['short-container-title'][0]) + container_id = ContainerId(revision_id=container.id) + + # work and release + work = WorkRevision(title=title) + work_id = WorkId(revision_id=work.id) + release = ReleaseRevision( + title=title, + creators=author_ids, + work_id=work.id, + container=container_id.id, + release_type=meta['type'], + doi=meta['DOI'], + date=meta['created']['date-time'], + license=meta.get('license', [dict(URL=None)])[0]['URL'] or None, + issue=meta.get('issue', None), + volume=meta.get('volume', None), + pages=meta.get('page', None)) + release_id = ReleaseId(revision_id=release.id) + work.primary_release = release.id + extra = json.dumps({ + 'crossref': { + 'links': meta.get('link', []), + 'subject': meta['subject'], + 'type': meta['type'], + 'alternative-id': meta.get('alternative-id', []), + } + }, indent=None).encode('utf-8') + extra_json = ExtraJson(json=extra, sha1=hashlib.sha1(extra).hexdigest()) + release.extra_json = extra_json.sha1 + + # references (TODO) + #refs = [] + #for rm in meta['reference']: + # rm: author, volume, first-page, year, journal-title, DOI + + db.session.add_all([work, work_id, release, release_id, container, + container_id, extra_json]) + db.session.add_all(author_revs) + db.session.add_all(author_ids) + db.session.commit() |