#!/usr/bin/env python3 import argparse import os import sys import raven from fatcat_tools import authenticated_api from fatcat_tools.importers import ( ARABESQUE_MATCH_WHERE_CLAUSE, ArabesqueMatchImporter, ArxivRawImporter, Bs4XmlFileListPusher, Bs4XmlFilePusher, Bs4XmlLargeFilePusher, Bs4XmlLinesPusher, ChoculaImporter, CrossrefImporter, DataciteImporter, DblpContainerImporter, DblpReleaseImporter, DoajArticleImporter, FileMetaImporter, FilesetImporter, GrobidMetadataImporter, IngestFileResultImporter, IngestFilesetResultImporter, IngestWebResultImporter, JalcImporter, JournalMetadataImporter, JsonLinePusher, JstorImporter, KafkaBs4XmlPusher, KafkaJsonPusher, LinePusher, MatchedImporter, OrcidImporter, PubmedImporter, SavePaperNowFileImporter, SavePaperNowFilesetImporter, SavePaperNowWebImporter, ShadowLibraryImporter, SqlitePusher, auto_cdl_dash_dat, auto_wayback_static, ) # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable sentry_client = raven.Client() def run_crossref(args): fci = CrossrefImporter( args.api, args.issn_map_file, extid_map_file=args.extid_map_file, edit_batch_size=args.batch_size, bezerk_mode=args.bezerk_mode, ) if args.kafka_mode: KafkaJsonPusher( fci, args.kafka_hosts, args.kafka_env, "api-crossref", "fatcat-{}-import-crossref".format(args.kafka_env), consume_batch_size=args.batch_size, ).run() else: JsonLinePusher(fci, args.json_file).run() def run_jalc(args): ji = JalcImporter(args.api, args.issn_map_file, extid_map_file=args.extid_map_file) Bs4XmlLinesPusher(ji, args.xml_file, "