diff options
| -rw-r--r-- | python/fatcat/crossref_importer.py | 2 | ||||
| -rw-r--r-- | python/fatcat/importer_common.py | 9 | ||||
| -rw-r--r-- | python/fatcat/issn_importer.py | 2 | ||||
| -rw-r--r-- | python/fatcat/orcid_importer.py | 2 | ||||
| -rwxr-xr-x | python/fatcat_import.py | 22 | 
5 files changed, 37 insertions, 0 deletions
| diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py index a2e14ed1..2154c8c0 100644 --- a/python/fatcat/crossref_importer.py +++ b/python/fatcat/crossref_importer.py @@ -160,6 +160,7 @@ class FatcatCrossrefImporter(FatcatImporter):                  re.container_id = container.ident                  self._issnl_id_map[ce.issnl] = container.ident              self.api.create_release(re, editgroup=editgroup) +            self.insert_count = self.insert_count + 1      def create_batch(self, batch, editgroup=None):          """Current work/release pairing disallows batch creation of releases. @@ -178,3 +179,4 @@ class FatcatCrossrefImporter(FatcatImporter):                      self._issnl_id_map[ce.issnl] = container.ident                  release_batch.append(re)          self.api.create_release_batch(release_batch, autoaccept="true", editgroup=editgroup) +        self.insert_count = self.insert_count + len(release_batch) diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py index ff0c8a27..95bec8a1 100644 --- a/python/fatcat/importer_common.py +++ b/python/fatcat/importer_common.py @@ -26,6 +26,13 @@ class FatcatImporter:          self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$")          if issn_map_file:              self.read_issn_map_file(issn_map_file) +        self.processed_lines = 0 +        self.insert_count = 0 +        self.update_count = 0 + +    def describe_run(self): +        print("Processed {} lines, inserted {}, updated {}.".format( +            self.processed_lines, self.insert_count, self.update_count))      def process_source(self, source, group_size=100):          """Creates and auto-accepts editgroup every group_size rows""" @@ -37,12 +44,14 @@ class FatcatImporter:                  self.api.accept_editgroup(eg)                  eg = self.api.create_editgroup(                      fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) +            self.processed_lines = self.processed_lines + 1          if i == 0 or (i % group_size) != 0:              self.api.accept_editgroup(eg.id)      def process_batch(self, source, size=50):          """Reads and processes in batches (not API-call-per-)"""          for rows in grouper(source, size): +            self.processed_lines = self.processed_lines + len(rows)              eg = self.api.create_editgroup(                  fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))              self.create_batch(rows, editgroup=eg.id) diff --git a/python/fatcat/issn_importer.py b/python/fatcat/issn_importer.py index ad2cad78..c9ef50b5 100644 --- a/python/fatcat/issn_importer.py +++ b/python/fatcat/issn_importer.py @@ -61,6 +61,7 @@ class FatcatIssnImporter(FatcatImporter):          ce = self.parse_issn_row(row)          if ce is not None:              self.api.create_container(ce, editgroup=editgroup) +            self.insert_count = self.insert_count + 1      def create_batch(self, batch, editgroup=None):          """Reads and processes in batches (not API-call-per-line)""" @@ -68,3 +69,4 @@ class FatcatIssnImporter(FatcatImporter):                     for l in batch if l != None]          objects = [o for o in objects if o != None]          self.api.create_container_batch(objects, autoaccept="true", editgroup=editgroup) +        self.insert_count = self.insert_count + len(objects) diff --git a/python/fatcat/orcid_importer.py b/python/fatcat/orcid_importer.py index 2eeac122..e1f5943c 100644 --- a/python/fatcat/orcid_importer.py +++ b/python/fatcat/orcid_importer.py @@ -62,6 +62,7 @@ class FatcatOrcidImporter(FatcatImporter):          ce = self.parse_orcid_dict(obj)          if ce is not None:              self.api.create_creator(ce, editgroup=editgroup) +            self.insert_count = self.insert_count + 1      def create_batch(self, batch, editgroup=None):          """Reads and processes in batches (not API-call-per-line)""" @@ -69,3 +70,4 @@ class FatcatOrcidImporter(FatcatImporter):                     for l in batch if l != None]          objects = [o for o in objects if o != None]          self.api.create_creator_batch(objects, autoaccept="true", editgroup=editgroup) +        self.insert_count = self.insert_count + len(objects) diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 2f0c746f..525cf286 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -12,18 +12,28 @@ def run_import_crossref(args):      fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file,          args.extid_map_file, create_containers=(not args.no_create_containers))      fci.process_batch(args.json_file, size=args.batch_size) +    fci.describe_run()  def run_import_orcid(args):      foi = FatcatOrcidImporter(args.host_url)      foi.process_batch(args.json_file, size=args.batch_size) +    foi.describe_run()  def run_import_issn(args):      fii = FatcatIssnImporter(args.host_url)      fii.process_csv_batch(args.csv_file, size=args.batch_size) +    fii.describe_run()  def run_import_manifest(args):      fmi = FatcatManifestImporter(args.host_url)      fmi.process_db(args.db_path, size=args.batch_size) +    fmi.describe_run() + +def run_import_matched(args): +    fmi = FatcatMatchedImporter(args.host_url, +        skip_file_update=args.no_file_update) +    fmi.process_db(args.db_path, size=args.batch_size) +    fmi.describe_run()  def health(args):      rfac = RawFatcatApiClient(args.host_url) @@ -84,6 +94,18 @@ def main():          help="size of batch to send",          default=50, type=int) +    sub_import_matched = subparsers.add_parser('import-matched') +    sub_import_matched.set_defaults(func=run_import_matched) +    sub_import_matched.add_argument('json_file', +        help="JSON file to import from (or stdin)", +        default=sys.stdin, type=argparse.FileType('r')) +    sub_import_matched.add_argument('--no-file-update', +        action='store_true', +        help="don't lookup existing files, just insert (only for bootstrap)") +    sub_import_matched.add_argument('--batch-size', +        help="size of batch to send", +        default=50, type=int) +      sub_health = subparsers.add_parser('health')      sub_health.set_defaults(func=health) | 
