aboutsummaryrefslogtreecommitdiffstats
path: root/extra/journal_metadata
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-07-26 15:49:13 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-07-26 15:49:13 -0700
commit568edcb7c1d7b4d52732b66c584576030b508491 (patch)
tree2e97eb075cdd2b7e731158c434e717d6dcfa7a2b /extra/journal_metadata
parentac4db073e0305e864778a306309f073d94524886 (diff)
downloadfatcat-568edcb7c1d7b4d52732b66c584576030b508491.tar.gz
fatcat-568edcb7c1d7b4d52732b66c584576030b508491.zip
shorter/simpler table names
Diffstat (limited to 'extra/journal_metadata')
-rwxr-xr-xextra/journal_metadata/chocula.py22
-rw-r--r--extra/journal_metadata/chocula_schema.sql4
2 files changed, 17 insertions, 9 deletions
diff --git a/extra/journal_metadata/chocula.py b/extra/journal_metadata/chocula.py
index 50dff49f..8125a8b9 100755
--- a/extra/journal_metadata/chocula.py
+++ b/extra/journal_metadata/chocula.py
@@ -371,7 +371,7 @@ class ChoculaDatabase():
extra = None
try:
- self.c.execute("INSERT INTO journal_index VALUES (?,?,?,?,?,?)",
+ self.c.execute("INSERT INTO directory VALUES (?,?,?,?,?,?)",
(issnl, index_slug, identifier, name, None, extra))
status = 'inserted'
except sqlite3.IntegrityError as ie:
@@ -671,7 +671,7 @@ class ChoculaDatabase():
path = args.input_file or GOLD_OA_FILE
print("##### Loading GOLD OA...")
# "ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE"
- reader = csv.DictReader(open(path))
+ reader = csv.DictReader(open(path, encoding="ISO-8859-1"))
counts = Counter()
self.c = self.db.cursor()
for row in reader:
@@ -683,11 +683,18 @@ class ChoculaDatabase():
extra['in_' + ind.lower()] = bool(int(row['JOURNAL_IN_' + ind]))
issnl, status = self.add_issn(
'gold_oa',
- issne=row['ISSN_L'],
+ raw_issn=row['ISSN_L'],
name=row['TITLE'],
extra=extra,
)
counts[status] += 1
+ # also add for other non-direct indices
+ for ind in ('OAPC', 'WOS', 'SCOPUS'):
+ issnl, status = self.add_issn(
+ ind.lower(),
+ raw_issn=row['ISSN_L'],
+ name=row['TITLE'],
+ )
self.c.close()
self.db.commit()
print(counts)
@@ -928,7 +935,7 @@ class ChoculaDatabase():
counts = Counter()
self.c = self.db.cursor()
self.db.row_factory = sqlite3.Row
- index_issnls = list(self.c.execute('SELECT DISTINCT issnl FROM journal_index'))
+ index_issnls = list(self.c.execute('SELECT DISTINCT issnl FROM directory'))
fatcat_issnls = list(self.c.execute('SELECT DISTINCT issnl FROM fatcat_container'))
all_issnls = set([i[0] for i in index_issnls + fatcat_issnls])
print("{} total ISSN-Ls".format(len(all_issnls)))
@@ -948,7 +955,7 @@ class ChoculaDatabase():
any_preservation = bool(frow['preserved_count'])
any_ia = bool(frow['ia_count'])
- cur = self.db.execute("SELECT * FROM journal_index WHERE issnl = ?;", [issnl])
+ cur = self.db.execute("SELECT * FROM directory WHERE issnl = ?;", [issnl])
for irow in cur:
if irow['slug'] in ('crossref',):
out['has_dois'] = True
@@ -970,7 +977,7 @@ class ChoculaDatabase():
if hrow['terminal_status_code'] == 200:
out['any_live_homepage'] = True
- self.c.execute("INSERT OR REPLACE INTO journal_summary (issnl, issne, issnp, fatcat_ident, name, publisher, country, lang, is_oa, is_longtail, has_dois, any_live_homepage, any_preservation, any_ia) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
+ self.c.execute("INSERT OR REPLACE INTO journal (issnl, issne, issnp, fatcat_ident, name, publisher, country, lang, is_oa, is_longtail, has_dois, any_live_homepage, any_preservation, any_ia) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
(issnl, out.get('issne'), out.get('issnp'),
out.get('fatcat_ident'), out.get('name'),
out.get('publisher'), out.get('country'), out.get('lang'),
@@ -990,6 +997,7 @@ class ChoculaDatabase():
self.index_road(args)
self.index_entrez(args)
self.index_ezb(args)
+ self.index_gold_oa(args)
self.load_fatcat(args)
self.load_fatcat_stats(args)
self.update_url_status(args)
@@ -1037,7 +1045,7 @@ def main():
sub.set_defaults(func='summarize')
# TODO: 'jurn'
- for ind in ('doaj', 'road', 'crossref', 'entrez', 'norwegian', 'szczepanski', 'ezb'):
+ for ind in ('doaj', 'road', 'crossref', 'entrez', 'norwegian', 'szczepanski', 'ezb', 'gold_oa'):
sub = subparsers.add_parser('index_{}'.format(ind))
sub.set_defaults(func='index_{}'.format(ind))
diff --git a/extra/journal_metadata/chocula_schema.sql b/extra/journal_metadata/chocula_schema.sql
index 015b46b7..25a2ae33 100644
--- a/extra/journal_metadata/chocula_schema.sql
+++ b/extra/journal_metadata/chocula_schema.sql
@@ -1,6 +1,6 @@
-CREATE TABLE IF NOT EXISTS journal_summary
+CREATE TABLE IF NOT EXISTS journal
(issnl TEXT NOT NULL PRIMARY KEY,
issne TEXT,
issnp TEXT,
@@ -22,7 +22,7 @@ CREATE TABLE IF NOT EXISTS journal_summary
any_ia BOOLEAN
);
-CREATE TABLE IF NOT EXISTS journal_index
+CREATE TABLE IF NOT EXISTS directory
(issnl TEXT NOT NULL,
slug TEXT NOT NULL,
identifier TEXT,