diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-13 11:32:41 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-13 11:32:41 -0800 |
commit | 279b22e30d9b590838268f5f5acdaa1110ee593a (patch) | |
tree | c9965a089be1b8ef607573ea9261c0c378c0ab47 /python/fatcat_tools/issn_importer.py | |
parent | 7ebda2e051b51e49544ab75673b19ec5f27d9d45 (diff) | |
download | fatcat-279b22e30d9b590838268f5f5acdaa1110ee593a.tar.gz fatcat-279b22e30d9b590838268f5f5acdaa1110ee593a.zip |
shuffle around fatcat_tools layout
Diffstat (limited to 'python/fatcat_tools/issn_importer.py')
-rw-r--r-- | python/fatcat_tools/issn_importer.py | 72 |
1 files changed, 0 insertions, 72 deletions
diff --git a/python/fatcat_tools/issn_importer.py b/python/fatcat_tools/issn_importer.py deleted file mode 100644 index e3ed7382..00000000 --- a/python/fatcat_tools/issn_importer.py +++ /dev/null @@ -1,72 +0,0 @@ - -import sys -import json -import itertools -import fatcat_client -from fatcat_tools.importer_common import FatcatImporter - -# CSV format (generated from git.archive.org/webgroup/oa-journal-analysis): -# ISSN-L,in_doaj,in_road,in_norwegian,in_crossref,title,publisher,url,lang,ISSN-print,ISSN-electronic,doi_count,has_doi,is_oa,is_kept,publisher_size,url_live,url_live_status,url_live_final_status,url_live_final_url,url_live_status_simple,url_live_final_status_simple,url_domain,gwb_pdf_count - -def or_none(s): - if s is None: - return None - if len(s) == 0: - return None - return s - -def truthy(s): - if s is None: - return None - s = s.lower() - if s in ('true', 't', 'yes', 'y', '1'): - return True - elif s in ('false', 'f', 'no', 'n', '0'): - return False - else: - return None - -class FatcatIssnImporter(FatcatImporter): - - def parse_issn_row(self, row): - """ - row is a python dict (parsed from CSV). - returns a ContainerEntity - """ - title = or_none(row['title']) - issnl = or_none(row['ISSN-L']) - if title is None or issnl is None: - return - extra = dict( - in_doaj=truthy(row['in_doaj']), - in_road=truthy(row['in_road']), - in_norwegian=truthy(row['in_norwegian']), - language=or_none(row['lang']), - url=or_none(row['url']), - ISSNp=or_none(row['ISSN-print']), - ISSNe=or_none(row['ISSN-electronic']), - is_oa=truthy(row['is_oa']), - is_kept=truthy(row['is_kept']), - ) - ce = fatcat_client.ContainerEntity( - issnl=issnl, - name=title, - publisher=or_none(row['publisher']), - abbrev=None, - coden=None, - extra=extra) - return ce - - def create_row(self, row, editgroup=None): - ce = self.parse_issn_row(row) - if ce is not None: - self.api.create_container(ce, editgroup=editgroup) - self.insert_count = self.insert_count + 1 - - def create_batch(self, batch, editgroup=None): - """Reads and processes in batches (not API-call-per-line)""" - objects = [self.parse_issn_row(l) - for l in batch if l != None] - objects = [o for o in objects if o != None] - self.api.create_container_batch(objects, autoaccept="true", editgroup=editgroup) - self.insert_count = self.insert_count + len(objects) |