aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-04-11 19:09:44 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-04-11 19:09:44 -0700
commit1d2d4aaefb9380709aa5650bc09dc29ea7d348cb (patch)
tree3f3a165a86f8ee566d9069977fa8d545bedcd708 /fatcat
parentc3ae6357b4824450263d727dc5d23b5cf0e9305f (diff)
downloadfatcat-1d2d4aaefb9380709aa5650bc09dc29ea7d348cb.tar.gz
fatcat-1d2d4aaefb9380709aa5650bc09dc29ea7d348cb.zip
crude crossref import test
Diffstat (limited to 'fatcat')
-rw-r--r--fatcat/models.py32
-rw-r--r--fatcat/sql.py75
2 files changed, 95 insertions, 12 deletions
diff --git a/fatcat/models.py b/fatcat/models.py
index 214ff8ac..78d6b7f5 100644
--- a/fatcat/models.py
+++ b/fatcat/models.py
@@ -16,13 +16,22 @@ states for identifiers:
work_contrib = db.Table("work_contrib",
db.Column("work_rev", db.ForeignKey('work_revision.id'), nullable=False, primary_key=True),
db.Column("creator_id", db.ForeignKey('creator_id.id'), nullable=False, primary_key=True),
+ db.Column("type", db.String, nullable=True),
db.Column("stub", db.String, nullable=True))
release_contrib = db.Table("release_contrib",
db.Column("release_rev", db.ForeignKey('release_revision.id'), nullable=False, primary_key=True),
db.Column("creator_id", db.ForeignKey('creator_id.id'), nullable=False, primary_key=True),
+ db.Column("type", db.String, nullable=True),
db.Column("stub", db.String, nullable=True))
+release_ref = db.Table("release_ref",
+ db.Column("release_rev", db.ForeignKey('release_revision.id'), nullable=False),
+ db.Column("target_release_id", db.ForeignKey('release_id.id'), nullable=False),
+ db.Column("index", db.Integer, nullable=True),
+ db.Column("stub", db.String, nullable=True),
+ db.Column("doi", db.String, nullable=True))
+
class WorkId(db.Model):
"""
If revision_id is null, this was deleted.
@@ -73,23 +82,29 @@ class ReleaseRevision(db.Model):
__tablename__ = 'release_revision'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
previous = db.Column(db.ForeignKey('release_revision.id'), nullable=True)
- state = db.Column(db.String) # TODO: enum
+ state = db.Column(db.String) # TODO: enum
redirect_id = db.Column(db.ForeignKey('release_id.id'), nullable=True)
edit_id = db.Column(db.ForeignKey('edit.id'))
extra_json = db.Column(db.ForeignKey('extra_json.sha1'), nullable=True)
#release_ids = db.relationship("ReleaseId", backref="revision", lazy=False)
work_id = db.ForeignKey('work_id.id')
- container = db.Column(db.ForeignKey('container_id.id'))
- title = db.Column(db.String)
- license = db.Column(db.String) # TODO: oa status foreign key
- release_type = db.Column(db.String) # TODO: foreign key
- date = db.Column(db.String) # TODO: datetime
- doi = db.Column(db.String) # TODO: identifier table
+ container = db.Column(db.ForeignKey('container_id.id'), nullable=True)
+ title = db.Column(db.String, nullable=False)
+ license = db.Column(db.String, nullable=True) # TODO: oa status foreign key
+ release_type = db.Column(db.String) # TODO: foreign key
+ date = db.Column(db.String, nullable=True) # TODO: datetime
+ doi = db.Column(db.String, nullable=True) # TODO: identifier table
+ volume = db.Column(db.String, nullable=True)
+ pages = db.Column(db.String, nullable=True)
+ issue = db.Column(db.String, nullable=True)
creators = db.relationship('CreatorId', secondary=release_contrib,
lazy='subquery')
#backref=db.backref('releases', lazy=True))
+ refs = db.relationship('ReleaseId', secondary=release_ref,
+ lazy='subquery')
+ #backref=db.backref('backrefs', lazy=True))
class CreatorId(db.Model):
__tablename__ = 'creator_id'
@@ -148,7 +163,7 @@ class FileRevision(db.Model):
sha1 = db.Column(db.Integer) # TODO: hash table... only or in addition?
url = db.Column(db.Integer) # TODO: URL table
-class ReleaseFil(db.Model):
+class ReleaseFile(db.Model):
__tablename__ = 'release_file'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
release_rev = db.Column(db.ForeignKey('release_revision.id'), nullable=False)
@@ -178,6 +193,7 @@ class Editor(db.Model):
username = db.Column(db.String)
class ChangelogEntry(db.Model):
+ # XXX: remove this?
__tablename__= 'changelog'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
edit_id = db.Column(db.ForeignKey('edit.id'))
diff --git a/fatcat/sql.py b/fatcat/sql.py
index c6e1aa4e..ace4b269 100644
--- a/fatcat/sql.py
+++ b/fatcat/sql.py
@@ -1,6 +1,8 @@
+import json
import random
-from fatcat import app, db
+import hashlib
+from fatcat import db
from fatcat.models import *
def populate_db():
@@ -23,11 +25,13 @@ def populate_db():
pi_release_id = ReleaseId(revision_id=pi_release.id)
pi_work.primary_release = pi_release.id
+ # TODO:
#pi_file = File(
# sha1="efee52e46c86691e2b892dbeb212f3b92e92e9d3",
# url="http://www.math.harvard.edu/~elkies/Misc/pi10.pdf")
db.session.add_all([n_elkies, pi_work, pi_work_id, pi_release, pi_release_id])
+ # TODO:
#ligo_collab = CreatorRevision(name="LIGO Scientific Collaboration")
#ligo_paper = ReleaseRevision(
# title="Full Band All-sky Search for Periodic Gravitational Waves in the O1 LIGO Data")
@@ -45,7 +49,7 @@ def populate_complex_db(count=100):
author_revs = []
author_ids = []
- for i in range(count):
+ for _ in range(count):
first = random.choice(first_names)
last = random.choice(last_names)
ar = CreatorRevision(
@@ -57,12 +61,12 @@ def populate_complex_db(count=100):
title_start = ("All about ", "When I grow up I want to be",
"The final word on", "Infinity: ", "The end of")
- title_ends = ("Humankind", "Bees", "Democracy", "Avocados")
+ title_ends = ("Humankind", "Bees", "Democracy", "Avocados", "«küßî»", "“ЌύБЇ”")
work_revs = []
work_ids = []
release_revs = []
release_ids = []
- for i in range(count):
+ for _ in range(count):
title = "{} {}".format(random.choice(title_start), random.choice(title_ends))
work = WorkRevision(title=title)
work_id = WorkId(revision_id=work.id)
@@ -94,3 +98,66 @@ def populate_complex_db(count=100):
db.session.add_all(release_ids)
db.session.commit()
+
+def add_crossref(meta):
+
+ title = meta['title'][0]
+
+ # authors
+ author_revs = []
+ author_ids = []
+ for am in meta['author']:
+ ar = CreatorRevision(
+ name="{} {}".format(am['given'], am['family']),
+ sortname="{}, {}".format(am['family'], am['given']),
+ orcid=None)
+ author_revs.append(ar)
+ author_ids.append(CreatorId(revision_id=ar.id))
+
+ # container
+ container = ContainerRevision(
+ issn=meta['ISSN'][0],
+ name=meta['container-title'][0],
+ container=None,
+ publisher=meta['publisher'],
+ sortname=meta['short-container-title'][0])
+ container_id = ContainerId(revision_id=container.id)
+
+ # work and release
+ work = WorkRevision(title=title)
+ work_id = WorkId(revision_id=work.id)
+ release = ReleaseRevision(
+ title=title,
+ creators=author_ids,
+ work_id=work.id,
+ container=container_id.id,
+ release_type=meta['type'],
+ doi=meta['DOI'],
+ date=meta['created']['date-time'],
+ license=meta.get('license', [dict(URL=None)])[0]['URL'] or None,
+ issue=meta.get('issue', None),
+ volume=meta.get('volume', None),
+ pages=meta.get('page', None))
+ release_id = ReleaseId(revision_id=release.id)
+ work.primary_release = release.id
+ extra = json.dumps({
+ 'crossref': {
+ 'links': meta.get('link', []),
+ 'subject': meta['subject'],
+ 'type': meta['type'],
+ 'alternative-id': meta.get('alternative-id', []),
+ }
+ }, indent=None).encode('utf-8')
+ extra_json = ExtraJson(json=extra, sha1=hashlib.sha1(extra).hexdigest())
+ release.extra_json = extra_json.sha1
+
+ # references (TODO)
+ #refs = []
+ #for rm in meta['reference']:
+ # rm: author, volume, first-page, year, journal-title, DOI
+
+ db.session.add_all([work, work_id, release, release_id, container,
+ container_id, extra_json])
+ db.session.add_all(author_revs)
+ db.session.add_all(author_ids)
+ db.session.commit()