aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_import.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-xpython/fatcat_import.py950
1 files changed, 591 insertions, 359 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index 3225688c..42001974 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -51,11 +51,13 @@ sentry_client = raven.Client()
def run_crossref(args):
- fci = CrossrefImporter(args.api,
+ fci = CrossrefImporter(
+ args.api,
args.issn_map_file,
extid_map_file=args.extid_map_file,
edit_batch_size=args.batch_size,
- bezerk_mode=args.bezerk_mode)
+ bezerk_mode=args.bezerk_mode,
+ )
if args.kafka_mode:
KafkaJsonPusher(
fci,
@@ -68,15 +70,14 @@ def run_crossref(args):
else:
JsonLinePusher(fci, args.json_file).run()
+
def run_jalc(args):
- ji = JalcImporter(args.api,
- args.issn_map_file,
- extid_map_file=args.extid_map_file)
+ ji = JalcImporter(args.api, args.issn_map_file, extid_map_file=args.extid_map_file)
Bs4XmlLinesPusher(ji, args.xml_file, "<rdf:Description").run()
+
def run_arxiv(args):
- ari = ArxivRawImporter(args.api,
- edit_batch_size=args.batch_size)
+ ari = ArxivRawImporter(args.api, edit_batch_size=args.batch_size)
if args.kafka_mode:
KafkaBs4XmlPusher(
ari,
@@ -87,15 +88,18 @@ def run_arxiv(args):
).run()
else:
if args.xml_file == sys.stdin:
- print('note: reading from stdin', file=sys.stderr)
+ print("note: reading from stdin", file=sys.stderr)
Bs4XmlFilePusher(ari, args.xml_file, "record").run()
+
def run_pubmed(args):
- pi = PubmedImporter(args.api,
+ pi = PubmedImporter(
+ args.api,
args.issn_map_file,
edit_batch_size=args.batch_size,
do_updates=args.do_updates,
- lookup_refs=(not args.no_lookup_refs))
+ lookup_refs=(not args.no_lookup_refs),
+ )
if args.kafka_mode:
KafkaBs4XmlPusher(
pi,
@@ -111,62 +115,67 @@ def run_pubmed(args):
["PubmedArticle"],
).run()
+
def run_jstor(args):
- ji = JstorImporter(args.api,
- args.issn_map_file,
- edit_batch_size=args.batch_size)
+ ji = JstorImporter(args.api, args.issn_map_file, edit_batch_size=args.batch_size)
Bs4XmlFileListPusher(ji, args.list_file, "article").run()
+
def run_orcid(args):
- foi = OrcidImporter(args.api,
- edit_batch_size=args.batch_size)
+ foi = OrcidImporter(args.api, edit_batch_size=args.batch_size)
JsonLinePusher(foi, args.json_file).run()
+
def run_journal_metadata(args):
- fii = JournalMetadataImporter(args.api,
- edit_batch_size=args.batch_size)
+ fii = JournalMetadataImporter(args.api, edit_batch_size=args.batch_size)
JsonLinePusher(fii, args.json_file).run()
+
def run_chocula(args):
- fii = ChoculaImporter(args.api,
- edit_batch_size=args.batch_size,
- do_updates=args.do_updates)
+ fii = ChoculaImporter(args.api, edit_batch_size=args.batch_size, do_updates=args.do_updates)
JsonLinePusher(fii, args.json_file).run()
+
def run_matched(args):
- fmi = MatchedImporter(args.api,
+ fmi = MatchedImporter(
+ args.api,
edit_batch_size=args.batch_size,
editgroup_description=args.editgroup_description_override,
default_link_rel=args.default_link_rel,
- default_mimetype=args.default_mimetype)
+ default_mimetype=args.default_mimetype,
+ )
JsonLinePusher(fmi, args.json_file).run()
+
def run_arabesque_match(args):
- if (args.sqlite_file and args.json_file) or not (args.sqlite_file or
- args.json_file):
+ if (args.sqlite_file and args.json_file) or not (args.sqlite_file or args.json_file):
print("Supply one of --sqlite-file or --json-file")
- ami = ArabesqueMatchImporter(args.api,
+ ami = ArabesqueMatchImporter(
+ args.api,
editgroup_description=args.editgroup_description_override,
do_updates=args.do_updates,
require_grobid=(not args.no_require_grobid),
extid_type=args.extid_type,
crawl_id=args.crawl_id,
default_link_rel=args.default_link_rel,
- edit_batch_size=args.batch_size)
+ edit_batch_size=args.batch_size,
+ )
if args.sqlite_file:
- SqlitePusher(ami, args.sqlite_file, "crawl_result",
- ARABESQUE_MATCH_WHERE_CLAUSE).run()
+ SqlitePusher(ami, args.sqlite_file, "crawl_result", ARABESQUE_MATCH_WHERE_CLAUSE).run()
elif args.json_file:
JsonLinePusher(ami, args.json_file).run()
+
def run_ingest_file(args):
- ifri = IngestFileResultImporter(args.api,
+ ifri = IngestFileResultImporter(
+ args.api,
editgroup_description=args.editgroup_description_override,
skip_source_allowlist=args.skip_source_allowlist,
do_updates=args.do_updates,
default_link_rel=args.default_link_rel,
require_grobid=(not args.no_require_grobid),
- edit_batch_size=args.batch_size)
+ edit_batch_size=args.batch_size,
+ )
if args.kafka_mode:
KafkaJsonPusher(
ifri,
@@ -180,13 +189,16 @@ def run_ingest_file(args):
else:
JsonLinePusher(ifri, args.json_file).run()
+
def run_ingest_web(args):
- iwri = IngestWebResultImporter(args.api,
+ iwri = IngestWebResultImporter(
+ args.api,
editgroup_description=args.editgroup_description_override,
skip_source_allowlist=args.skip_source_allowlist,
do_updates=args.do_updates,
default_link_rel=args.default_link_rel,
- edit_batch_size=args.batch_size)
+ edit_batch_size=args.batch_size,
+ )
if args.kafka_mode:
KafkaJsonPusher(
iwri,
@@ -201,13 +213,16 @@ def run_ingest_web(args):
else:
JsonLinePusher(iwri, args.json_file).run()
+
def run_ingest_fileset(args):
- ifri = IngestFilesetResultImporter(args.api,
+ ifri = IngestFilesetResultImporter(
+ args.api,
editgroup_description=args.editgroup_description_override,
skip_source_allowlist=args.skip_source_allowlist,
do_updates=args.do_updates,
default_link_rel=args.default_link_rel,
- edit_batch_size=args.batch_size)
+ edit_batch_size=args.batch_size,
+ )
if args.kafka_mode:
KafkaJsonPusher(
ifri,
@@ -222,10 +237,13 @@ def run_ingest_fileset(args):
else:
JsonLinePusher(ifri, args.json_file).run()
+
def run_savepapernow_file(args):
- ifri = SavePaperNowFileImporter(args.api,
+ ifri = SavePaperNowFileImporter(
+ args.api,
editgroup_description=args.editgroup_description_override,
- edit_batch_size=args.batch_size)
+ edit_batch_size=args.batch_size,
+ )
if args.kafka_mode:
KafkaJsonPusher(
ifri,
@@ -240,10 +258,13 @@ def run_savepapernow_file(args):
else:
JsonLinePusher(ifri, args.json_file).run()
+
def run_savepapernow_web(args):
- ifri = SavePaperNowWebImporter(args.api,
+ ifri = SavePaperNowWebImporter(
+ args.api,
editgroup_description=args.editgroup_description_override,
- edit_batch_size=args.batch_size)
+ edit_batch_size=args.batch_size,
+ )
if args.kafka_mode:
KafkaJsonPusher(
ifri,
@@ -258,10 +279,13 @@ def run_savepapernow_web(args):
else:
JsonLinePusher(ifri, args.json_file).run()
+
def run_savepapernow_fileset(args):
- ifri = SavePaperNowFilesetImporter(args.api,
+ ifri = SavePaperNowFilesetImporter(
+ args.api,
editgroup_description=args.editgroup_description_override,
- edit_batch_size=args.batch_size)
+ edit_batch_size=args.batch_size,
+ )
if args.kafka_mode:
KafkaJsonPusher(
ifri,
@@ -276,18 +300,22 @@ def run_savepapernow_fileset(args):
else:
JsonLinePusher(ifri, args.json_file).run()
+
def run_grobid_metadata(args):
- fmi = GrobidMetadataImporter(args.api,
+ fmi = GrobidMetadataImporter(
+ args.api,
edit_batch_size=args.batch_size,
longtail_oa=args.longtail_oa,
- bezerk_mode=args.bezerk_mode)
+ bezerk_mode=args.bezerk_mode,
+ )
LinePusher(fmi, args.tsv_file).run()
+
def run_shadow_lib(args):
- fmi = ShadowLibraryImporter(args.api,
- edit_batch_size=100)
+ fmi = ShadowLibraryImporter(args.api, edit_batch_size=100)
JsonLinePusher(fmi, args.json_file).run()
+
def run_wayback_static(args):
api = args.api
@@ -295,8 +323,8 @@ def run_wayback_static(args):
if args.release_id:
release_id = args.release_id
elif args.extid:
- idtype = args.extid.split(':')[0]
- extid = ':'.join(args.extid.split(':')[1:])
+ idtype = args.extid.split(":")[0]
+ extid = ":".join(args.extid.split(":")[1:])
if idtype == "doi":
release_id = api.lookup_release(doi=extid).ident
elif idtype == "pmid":
@@ -309,8 +337,9 @@ def run_wayback_static(args):
raise Exception("need either release_id or extid argument")
# create it
- (editgroup_id, wc) = auto_wayback_static(api, release_id, args.wayback_url,
- editgroup_id=args.editgroup_id)
+ (editgroup_id, wc) = auto_wayback_static(
+ api, release_id, args.wayback_url, editgroup_id=args.editgroup_id
+ )
if not wc:
return
print("release_id: {}".format(release_id))
@@ -318,12 +347,14 @@ def run_wayback_static(args):
print("webcapture id: {}".format(wc.ident))
print("link: https://fatcat.wiki/webcapture/{}".format(wc.ident))
+
def run_cdl_dash_dat(args):
api = args.api
# create it
- (editgroup_id, release, fs) = auto_cdl_dash_dat(api, args.dat_path,
- release_id=args.release_id, editgroup_id=args.editgroup_id)
+ (editgroup_id, release, fs) = auto_cdl_dash_dat(
+ api, args.dat_path, release_id=args.release_id, editgroup_id=args.editgroup_id
+ )
if not fs:
return
print("release_id: {}".format(release.ident))
@@ -331,14 +362,17 @@ def run_cdl_dash_dat(args):
print("fileset id: {}".format(fs.ident))
print("link: https://fatcat.wiki/fileset/{}".format(fs.ident))
+
def run_datacite(args):
- dci = DataciteImporter(args.api,
+ dci = DataciteImporter(
+ args.api,
args.issn_map_file,
edit_batch_size=args.batch_size,
bezerk_mode=args.bezerk_mode,
debug=args.debug,
extid_map_file=args.extid_map_file,
- insert_log_file=args.insert_log_file)
+ insert_log_file=args.insert_log_file,
+ )
if args.kafka_mode:
KafkaJsonPusher(
dci,
@@ -351,8 +385,10 @@ def run_datacite(args):
else:
JsonLinePusher(dci, args.json_file).run()
+
def run_doaj_article(args):
- dai = DoajArticleImporter(args.api,
+ dai = DoajArticleImporter(
+ args.api,
args.issn_map_file,
edit_batch_size=args.batch_size,
do_updates=args.do_updates,
@@ -369,8 +405,10 @@ def run_doaj_article(args):
else:
JsonLinePusher(dai, args.json_file).run()
+
def run_dblp_release(args):
- dri = DblpReleaseImporter(args.api,
+ dri = DblpReleaseImporter(
+ args.api,
dblp_container_map_file=args.dblp_container_map_file,
edit_batch_size=args.batch_size,
do_updates=args.do_updates,
@@ -383,8 +421,10 @@ def run_dblp_release(args):
use_lxml=True,
).run()
+
def run_dblp_container(args):
- dci = DblpContainerImporter(args.api,
+ dci = DblpContainerImporter(
+ args.api,
args.issn_map_file,
dblp_container_map_file=args.dblp_container_map_file,
dblp_container_map_output=args.dblp_container_map_output,
@@ -393,14 +433,17 @@ def run_dblp_container(args):
)
JsonLinePusher(dci, args.json_file).run()
+
def run_file_meta(args):
# do_updates defaults to true for this importer
- fmi = FileMetaImporter(args.api,
+ fmi = FileMetaImporter(
+ args.api,
edit_batch_size=100,
editgroup_description=args.editgroup_description_override,
)
JsonLinePusher(fmi, args.json_file).run()
+
def run_fileset(args):
fmi = FilesetImporter(
args.api,
@@ -409,478 +452,664 @@ def run_fileset(args):
)
JsonLinePusher(fmi, args.json_file).run()
+
def main():
- parser = argparse.ArgumentParser(
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser.add_argument('--host-url',
- default="http://localhost:9411/v0",
- help="connect to this host/port")
- parser.add_argument('--kafka-hosts',
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument(
+ "--host-url", default="http://localhost:9411/v0", help="connect to this host/port"
+ )
+ parser.add_argument(
+ "--kafka-hosts",
default="localhost:9092",
- help="list of Kafka brokers (host/port) to use")
- parser.add_argument('--kafka-env',
- default="dev",
- help="Kafka topic namespace to use (eg, prod, qa)")
- parser.add_argument('--batch-size',
- help="size of batch to send",
- default=50, type=int)
- parser.add_argument('--editgroup-description-override',
+ help="list of Kafka brokers (host/port) to use",
+ )
+ parser.add_argument(
+ "--kafka-env", default="dev", help="Kafka topic namespace to use (eg, prod, qa)"
+ )
+ parser.add_argument("--batch-size", help="size of batch to send", default=50, type=int)
+ parser.add_argument(
+ "--editgroup-description-override",
help="editgroup description override",
- default=None, type=str)
+ default=None,
+ type=str,
+ )
subparsers = parser.add_subparsers()
- sub_crossref = subparsers.add_parser('crossref',
- help="import Crossref API metadata format (JSON)")
+ sub_crossref = subparsers.add_parser(
+ "crossref", help="import Crossref API metadata format (JSON)"
+ )
sub_crossref.set_defaults(
func=run_crossref,
auth_var="FATCAT_AUTH_WORKER_CROSSREF",
)
- sub_crossref.add_argument('json_file',
+ sub_crossref.add_argument(
+ "json_file",
help="crossref JSON file to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_crossref.add_argument('issn_map_file',
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_crossref.add_argument(
+ "issn_map_file",
help="ISSN to ISSN-L mapping file",
- default=None, type=argparse.FileType('r'))
- sub_crossref.add_argument('--extid-map-file',
+ default=None,
+ type=argparse.FileType("r"),
+ )
+ sub_crossref.add_argument(
+ "--extid-map-file",
help="DOI-to-other-identifiers sqlite3 database",
- default=None, type=str)
- sub_crossref.add_argument('--no-lookup-refs',
- action='store_true',
- help="skip lookup of references (PMID or DOI)")
- sub_crossref.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
- sub_crossref.add_argument('--bezerk-mode',
- action='store_true',
- help="don't lookup existing DOIs, just insert (clobbers; only for fast bootstrap)")
-
- sub_jalc = subparsers.add_parser('jalc',
- help="import JALC DOI metadata from XML dump")
+ default=None,
+ type=str,
+ )
+ sub_crossref.add_argument(
+ "--no-lookup-refs", action="store_true", help="skip lookup of references (PMID or DOI)"
+ )
+ sub_crossref.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
+ sub_crossref.add_argument(
+ "--bezerk-mode",
+ action="store_true",
+ help="don't lookup existing DOIs, just insert (clobbers; only for fast bootstrap)",
+ )
+
+ sub_jalc = subparsers.add_parser("jalc", help="import JALC DOI metadata from XML dump")
sub_jalc.set_defaults(
func=run_jalc,
auth_var="FATCAT_AUTH_WORKER_JALC",
)
- sub_jalc.add_argument('xml_file',
+ sub_jalc.add_argument(
+ "xml_file",
help="Jalc RDF XML file (record-per-line) to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_jalc.add_argument('issn_map_file',
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_jalc.add_argument(
+ "issn_map_file",
help="ISSN to ISSN-L mapping file",
- default=None, type=argparse.FileType('r'))
- sub_jalc.add_argument('--extid-map-file',
+ default=None,
+ type=argparse.FileType("r"),
+ )
+ sub_jalc.add_argument(
+ "--extid-map-file",
help="DOI-to-other-identifiers sqlite3 database",
- default=None, type=str)
+ default=None,
+ type=str,
+ )
- sub_arxiv = subparsers.add_parser('arxiv',
- help="import arxiv.org metadata from XML files")
+ sub_arxiv = subparsers.add_parser("arxiv", help="import arxiv.org metadata from XML files")
sub_arxiv.set_defaults(
func=run_arxiv,
auth_var="FATCAT_AUTH_WORKER_ARXIV",
)
- sub_arxiv.add_argument('xml_file',
- nargs='?',
+ sub_arxiv.add_argument(
+ "xml_file",
+ nargs="?",
help="arXivRaw XML file to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_arxiv.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_arxiv.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
- sub_pubmed = subparsers.add_parser('pubmed',
- help="import MEDLINE/PubMed work-level metadata (XML)")
+ sub_pubmed = subparsers.add_parser(
+ "pubmed", help="import MEDLINE/PubMed work-level metadata (XML)"
+ )
sub_pubmed.set_defaults(
func=run_pubmed,
auth_var="FATCAT_AUTH_WORKER_PUBMED",
)
- sub_pubmed.add_argument('xml_file',
- nargs='?',
+ sub_pubmed.add_argument(
+ "xml_file",
+ nargs="?",
help="Pubmed XML file to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_pubmed.add_argument('issn_map_file',
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_pubmed.add_argument(
+ "issn_map_file",
help="ISSN to ISSN-L mapping file",
- default=None, type=argparse.FileType('r'))
- sub_pubmed.add_argument('--no-lookup-refs',
- action='store_true',
- help="skip lookup of references (PMID or DOI)")
- sub_pubmed.add_argument('--do-updates',
- action='store_true',
- help="update pre-existing release entities")
- sub_pubmed.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
-
- sub_jstor = subparsers.add_parser('jstor',
- help="import JSTOR work-level metadata from XML dump")
+ default=None,
+ type=argparse.FileType("r"),
+ )
+ sub_pubmed.add_argument(
+ "--no-lookup-refs", action="store_true", help="skip lookup of references (PMID or DOI)"
+ )
+ sub_pubmed.add_argument(
+ "--do-updates", action="store_true", help="update pre-existing release entities"
+ )
+ sub_pubmed.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
+
+ sub_jstor = subparsers.add_parser(
+ "jstor", help="import JSTOR work-level metadata from XML dump"
+ )
sub_jstor.set_defaults(
func=run_jstor,
auth_var="FATCAT_AUTH_WORKER_JSTOR",
)
- sub_jstor.add_argument('list_file',
+ sub_jstor.add_argument(
+ "list_file",
help="List of JSTOR XML file paths to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_jstor.add_argument('issn_map_file',
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_jstor.add_argument(
+ "issn_map_file",
help="ISSN to ISSN-L mapping file",
- default=None, type=argparse.FileType('r'))
+ default=None,
+ type=argparse.FileType("r"),
+ )
- sub_orcid = subparsers.add_parser('orcid',
- help="import creator entities from ORCID XML dump")
- sub_orcid.set_defaults(
- func=run_orcid,
- auth_var="FATCAT_AUTH_WORKER_ORCID"
+ sub_orcid = subparsers.add_parser(
+ "orcid", help="import creator entities from ORCID XML dump"
)
- sub_orcid.add_argument('json_file',
+ sub_orcid.set_defaults(func=run_orcid, auth_var="FATCAT_AUTH_WORKER_ORCID")
+ sub_orcid.add_argument(
+ "json_file",
help="orcid JSON file to import from (or stdin)",
- default=sys.stdin, type=argparse.FileType('r'))
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
- sub_journal_metadata = subparsers.add_parser('journal-metadata',
- help="import/update container metadata from old manual munging format")
+ sub_journal_metadata = subparsers.add_parser(
+ "journal-metadata",
+ help="import/update container metadata from old manual munging format",
+ )
sub_journal_metadata.set_defaults(
func=run_journal_metadata,
auth_var="FATCAT_AUTH_WORKER_JOURNAL_METADATA",
)
- sub_journal_metadata.add_argument('json_file',
+ sub_journal_metadata.add_argument(
+ "json_file",
help="Journal JSON metadata file to import from (or stdin)",
- default=sys.stdin, type=argparse.FileType('r'))
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
- sub_chocula = subparsers.add_parser('chocula',
- help="import/update container metadata from chocula JSON export")
+ sub_chocula = subparsers.add_parser(
+ "chocula", help="import/update container metadata from chocula JSON export"
+ )
sub_chocula.set_defaults(
func=run_chocula,
auth_var="FATCAT_AUTH_WORKER_JOURNAL_METADATA",
)
- sub_chocula.add_argument('json_file',
+ sub_chocula.add_argument(
+ "json_file",
help="chocula JSON entities file (or stdin)",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_chocula.add_argument('--do-updates',
- action='store_true',
- help="update pre-existing container entities")
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_chocula.add_argument(
+ "--do-updates", action="store_true", help="update pre-existing container entities"
+ )
- sub_matched = subparsers.add_parser('matched',
- help="add file entities matched against existing releases; custom JSON format")
+ sub_matched = subparsers.add_parser(
+ "matched",
+ help="add file entities matched against existing releases; custom JSON format",
+ )
sub_matched.set_defaults(
func=run_matched,
auth_var="FATCAT_API_AUTH_TOKEN",
)
- sub_matched.add_argument('json_file',
+ sub_matched.add_argument(
+ "json_file",
help="JSON file to import from (or stdin)",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_matched.add_argument('--default-mimetype',
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_matched.add_argument(
+ "--default-mimetype",
default=None,
- help="default mimetype for imported files (if not specified per-file)")
- sub_matched.add_argument('--bezerk-mode',
- action='store_true',
- help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)")
- sub_matched.add_argument('--default-link-rel',
+ help="default mimetype for imported files (if not specified per-file)",
+ )
+ sub_matched.add_argument(
+ "--bezerk-mode",
+ action="store_true",
+ help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)",
+ )
+ sub_matched.add_argument(
+ "--default-link-rel",
default="web",
- help="default URL rel for matches (eg, 'publisher', 'web')")
+ help="default URL rel for matches (eg, 'publisher', 'web')",
+ )
- sub_arabesque_match = subparsers.add_parser('arabesque',
- help="add file entities matched to releases from crawl log analysis")
+ sub_arabesque_match = subparsers.add_parser(
+ "arabesque", help="add file entities matched to releases from crawl log analysis"
+ )
sub_arabesque_match.set_defaults(
func=run_arabesque_match,
auth_var="FATCAT_AUTH_WORKER_CRAWL",
)
- sub_arabesque_match.add_argument('--sqlite-file',
- help="sqlite database file to import from")
- sub_arabesque_match.add_argument('--json-file',
- help="JSON file to import from (or stdin)",
- type=argparse.FileType('r'))
- sub_arabesque_match.add_argument('--do-updates',
- action='store_true',
- help="update pre-existing file entities if new match (instead of skipping)")
- sub_arabesque_match.add_argument('--no-require-grobid',
- action='store_true',
- help="whether postproc_status column must be '200'")
- sub_arabesque_match.add_argument('--extid-type',
+ sub_arabesque_match.add_argument(
+ "--sqlite-file", help="sqlite database file to import from"
+ )
+ sub_arabesque_match.add_argument(
+ "--json-file", help="JSON file to import from (or stdin)", type=argparse.FileType("r")
+ )
+ sub_arabesque_match.add_argument(
+ "--do-updates",
+ action="store_true",
+ help="update pre-existing file entities if new match (instead of skipping)",
+ )
+ sub_arabesque_match.add_argument(
+ "--no-require-grobid",
+ action="store_true",
+ help="whether postproc_status column must be '200'",
+ )
+ sub_arabesque_match.add_argument(
+ "--extid-type",
default="doi",
- help="identifier type in the database (eg, 'doi', 'pmcid'")
- sub_arabesque_match.add_argument('--crawl-id',
- help="crawl ID (optionally included in editgroup metadata)")
- sub_arabesque_match.add_argument('--default-link-rel',
+ help="identifier type in the database (eg, 'doi', 'pmcid'",
+ )
+ sub_arabesque_match.add_argument(
+ "--crawl-id", help="crawl ID (optionally included in editgroup metadata)"
+ )
+ sub_arabesque_match.add_argument(
+ "--default-link-rel",
default="web",
- help="default URL rel for matches (eg, 'publisher', 'web')")
+ help="default URL rel for matches (eg, 'publisher', 'web')",
+ )
- sub_ingest_file = subparsers.add_parser('ingest-file-results',
- help="add/update file entities linked to releases based on sandcrawler ingest results")
+ sub_ingest_file = subparsers.add_parser(
+ "ingest-file-results",
+ help="add/update file entities linked to releases based on sandcrawler ingest results",
+ )
sub_ingest_file.set_defaults(
func=run_ingest_file,
auth_var="FATCAT_AUTH_WORKER_CRAWL",
)
- sub_ingest_file.add_argument('json_file',
+ sub_ingest_file.add_argument(
+ "json_file",
help="ingest_file JSON file to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_ingest_file.add_argument('--skip-source-allowlist',
- action='store_true',
- help="don't filter import based on request source allowlist")
- sub_ingest_file.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
- sub_ingest_file.add_argument('--do-updates',
- action='store_true',
- help="update pre-existing file entities if new match (instead of skipping)")
- sub_ingest_file.add_argument('--no-require-grobid',
- action='store_true',
- help="whether postproc_status column must be '200'")
- sub_ingest_file.add_argument('--default-link-rel',
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_ingest_file.add_argument(
+ "--skip-source-allowlist",
+ action="store_true",
+ help="don't filter import based on request source allowlist",
+ )
+ sub_ingest_file.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
+ sub_ingest_file.add_argument(
+ "--do-updates",
+ action="store_true",
+ help="update pre-existing file entities if new match (instead of skipping)",
+ )
+ sub_ingest_file.add_argument(
+ "--no-require-grobid",
+ action="store_true",
+ help="whether postproc_status column must be '200'",
+ )
+ sub_ingest_file.add_argument(
+ "--default-link-rel",
default="web",
- help="default URL rel for matches (eg, 'publisher', 'web')")
+ help="default URL rel for matches (eg, 'publisher', 'web')",
+ )
- sub_ingest_web = subparsers.add_parser('ingest-web-results',
- help="add/update web entities linked to releases based on sandcrawler ingest results")
+ sub_ingest_web = subparsers.add_parser(
+ "ingest-web-results",
+ help="add/update web entities linked to releases based on sandcrawler ingest results",
+ )
sub_ingest_web.set_defaults(
func=run_ingest_web,
auth_var="FATCAT_AUTH_WORKER_CRAWL",
)
- sub_ingest_web.add_argument('json_file',
+ sub_ingest_web.add_argument(
+ "json_file",
help="ingest_web JSON file to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_ingest_web.add_argument('--skip-source-allowlist',
- action='store_true',
- help="don't filter import based on request source allowlist")
- sub_ingest_web.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
- sub_ingest_web.add_argument('--do-updates',
- action='store_true',
- help="update pre-existing web entities if new match (instead of skipping)")
- sub_ingest_web.add_argument('--default-link-rel',
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_ingest_web.add_argument(
+ "--skip-source-allowlist",
+ action="store_true",
+ help="don't filter import based on request source allowlist",
+ )
+ sub_ingest_web.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
+ sub_ingest_web.add_argument(
+ "--do-updates",
+ action="store_true",
+ help="update pre-existing web entities if new match (instead of skipping)",
+ )
+ sub_ingest_web.add_argument(
+ "--default-link-rel",
default="web",
- help="default URL rel for matches (eg, 'publisher', 'web')")
+ help="default URL rel for matches (eg, 'publisher', 'web')",
+ )
- sub_ingest_fileset = subparsers.add_parser('ingest-fileset-results',
- help="add/update fileset entities linked to releases based on sandcrawler ingest results")
+ sub_ingest_fileset = subparsers.add_parser(
+ "ingest-fileset-results",
+ help="add/update fileset entities linked to releases based on sandcrawler ingest results",
+ )
sub_ingest_fileset.set_defaults(
func=run_ingest_fileset,
auth_var="FATCAT_AUTH_WORKER_CRAWL",
)
- sub_ingest_fileset.add_argument('json_file',
+ sub_ingest_fileset.add_argument(
+ "json_file",
help="ingest_fileset JSON file to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_ingest_fileset.add_argument('--skip-source-allowlist',
- action='store_true',
- help="don't filter import based on request source allowlist")
- sub_ingest_fileset.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
- sub_ingest_fileset.add_argument('--do-updates',
- action='store_true',
- help="update pre-existing fileset entities if new match (instead of skipping)")
- sub_ingest_fileset.add_argument('--default-link-rel',
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_ingest_fileset.add_argument(
+ "--skip-source-allowlist",
+ action="store_true",
+ help="don't filter import based on request source allowlist",
+ )
+ sub_ingest_fileset.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
+ sub_ingest_fileset.add_argument(
+ "--do-updates",
+ action="store_true",
+ help="update pre-existing fileset entities if new match (instead of skipping)",
+ )
+ sub_ingest_fileset.add_argument(
+ "--default-link-rel",
default="fileset",
- help="default URL rel for matches (eg, 'publisher', 'web')")
+ help="default URL rel for matches (eg, 'publisher', 'web')",
+ )
- sub_savepapernow_file = subparsers.add_parser('savepapernow-file-results',
- help="add file entities crawled due to async Save Paper Now request")
+ sub_savepapernow_file = subparsers.add_parser(
+ "savepapernow-file-results",
+ help="add file entities crawled due to async Save Paper Now request",
+ )
sub_savepapernow_file.set_defaults(
func=run_savepapernow_file,
auth_var="FATCAT_AUTH_WORKER_SAVEPAPERNOW",
)
- sub_savepapernow_file.add_argument('json_file',
+ sub_savepapernow_file.add_argument(
+ "json_file",
help="ingest-file JSON file to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_savepapernow_file.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_savepapernow_file.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
- sub_savepapernow_web = subparsers.add_parser('savepapernow-web-results',
- help="add webcapture entities crawled due to async Save Paper Now request")
+ sub_savepapernow_web = subparsers.add_parser(
+ "savepapernow-web-results",
+ help="add webcapture entities crawled due to async Save Paper Now request",
+ )
sub_savepapernow_web.set_defaults(
func=run_savepapernow_web,
auth_var="FATCAT_AUTH_WORKER_SAVEPAPERNOW",
)
- sub_savepapernow_web.add_argument('json_file',
+ sub_savepapernow_web.add_argument(
+ "json_file",
help="ingest-file JSON file to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_savepapernow_web.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_savepapernow_web.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
- sub_savepapernow_fileset = subparsers.add_parser('savepapernow-fileset-results',
- help="add fileset entities crawled due to async Save Paper Now request")
+ sub_savepapernow_fileset = subparsers.add_parser(
+ "savepapernow-fileset-results",
+ help="add fileset entities crawled due to async Save Paper Now request",
+ )
sub_savepapernow_fileset.set_defaults(
func=run_savepapernow_fileset,
auth_var="FATCAT_AUTH_WORKER_SAVEPAPERNOW",
)
- sub_savepapernow_fileset.add_argument('json_file',
+ sub_savepapernow_fileset.add_argument(
+ "json_file",
help="ingest-file JSON file to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_savepapernow_fileset.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_savepapernow_fileset.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
- sub_grobid_metadata = subparsers.add_parser('grobid-metadata',
- help="create release and file entities based on GROBID PDF metadata extraction")
+ sub_grobid_metadata = subparsers.add_parser(
+ "grobid-metadata",
+ help="create release and file entities based on GROBID PDF metadata extraction",
+ )
sub_grobid_metadata.set_defaults(
func=run_grobid_metadata,
auth_var="FATCAT_API_AUTH_TOKEN",
)
- sub_grobid_metadata.add_argument('tsv_file',
+ sub_grobid_metadata.add_argument(
+ "tsv_file",
help="TSV file to import from (or stdin)",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_grobid_metadata.add_argument('--group-size',
- help="editgroup group size to use",
- default=75, type=int)
- sub_grobid_metadata.add_argument('--longtail-oa',
- action='store_true',
- help="if this is an import of longtail OA content (sets an 'extra' flag)")
- sub_grobid_metadata.add_argument('--bezerk-mode',
- action='store_true',
- help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)")
-
- sub_shadow_lib = subparsers.add_parser('shadow-lib',
- help="create release and file entities based on GROBID PDF metadata extraction")
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_grobid_metadata.add_argument(
+ "--group-size", help="editgroup group size to use", default=75, type=int
+ )
+ sub_grobid_metadata.add_argument(
+ "--longtail-oa",
+ action="store_true",
+ help="if this is an import of longtail OA content (sets an 'extra' flag)",
+ )
+ sub_grobid_metadata.add_argument(
+ "--bezerk-mode",
+ action="store_true",
+ help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)",
+ )
+
+ sub_shadow_lib = subparsers.add_parser(
+ "shadow-lib",
+ help="create release and file entities based on GROBID PDF metadata extraction",
+ )
sub_shadow_lib.set_defaults(
func=run_shadow_lib,
auth_var="FATCAT_AUTH_WORKER_SHADOW",
)
- sub_shadow_lib.add_argument('json_file',
+ sub_shadow_lib.add_argument(
+ "json_file",
help="JSON file to import from (or stdin)",
- default=sys.stdin, type=argparse.FileType('r'))
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
- sub_wayback_static = subparsers.add_parser('wayback-static',
- help="crude crawl+ingest tool for single-page HTML docs from wayback")
+ sub_wayback_static = subparsers.add_parser(
+ "wayback-static", help="crude crawl+ingest tool for single-page HTML docs from wayback"
+ )
sub_wayback_static.set_defaults(
func=run_wayback_static,
auth_var="FATCAT_API_AUTH_TOKEN",
)
- sub_wayback_static.add_argument('wayback_url',
- type=str,
- help="URL of wayback capture to extract from")
- sub_wayback_static.add_argument('--extid',
- type=str,
- help="external identifier for release lookup")
- sub_wayback_static.add_argument('--release-id',
- type=str,
- help="release entity identifier")
- sub_wayback_static.add_argument('--editgroup-id',
+ sub_wayback_static.add_argument(
+ "wayback_url", type=str, help="URL of wayback capture to extract from"
+ )
+ sub_wayback_static.add_argument(
+ "--extid", type=str, help="external identifier for release lookup"
+ )
+ sub_wayback_static.add_argument("--release-id", type=str, help="release entity identifier")
+ sub_wayback_static.add_argument(
+ "--editgroup-id",
type=str,
- help="use existing editgroup (instead of creating a new one)")
+ help="use existing editgroup (instead of creating a new one)",
+ )
- sub_cdl_dash_dat = subparsers.add_parser('cdl-dash-dat',
- help="crude helper to import datasets from Dat/CDL mirror pilot project")
+ sub_cdl_dash_dat = subparsers.add_parser(
+ "cdl-dash-dat", help="crude helper to import datasets from Dat/CDL mirror pilot project"
+ )
sub_cdl_dash_dat.set_defaults(
func=run_cdl_dash_dat,
auth_var="FATCAT_API_AUTH_TOKEN",
)
- sub_cdl_dash_dat.add_argument('dat_path',
- type=str,
- help="local path dat to import (must be the dat discovery key)")
- sub_cdl_dash_dat.add_argument('--release-id',
- type=str,
- help="release entity identifier")
- sub_cdl_dash_dat.add_argument('--editgroup-id',
+ sub_cdl_dash_dat.add_argument(
+ "dat_path", type=str, help="local path dat to import (must be the dat discovery key)"
+ )
+ sub_cdl_dash_dat.add_argument("--release-id", type=str, help="release entity identifier")
+ sub_cdl_dash_dat.add_argument(
+ "--editgroup-id",
type=str,
- help="use existing editgroup (instead of creating a new one)")
+ help="use existing editgroup (instead of creating a new one)",
+ )
- sub_datacite = subparsers.add_parser('datacite',
- help="import datacite.org metadata")
- sub_datacite.add_argument('json_file',
+ sub_datacite = subparsers.add_parser("datacite", help="import datacite.org metadata")
+ sub_datacite.add_argument(
+ "json_file",
help="File with jsonlines from datacite.org v2 API to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_datacite.add_argument('issn_map_file',
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_datacite.add_argument(
+ "issn_map_file",
help="ISSN to ISSN-L mapping file",
- default=None, type=argparse.FileType('r'))
- sub_datacite.add_argument('--extid-map-file',
+ default=None,
+ type=argparse.FileType("r"),
+ )
+ sub_datacite.add_argument(
+ "--extid-map-file",
help="DOI-to-other-identifiers sqlite3 database",
- default=None, type=str)
- sub_datacite.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
- sub_datacite.add_argument('--bezerk-mode',
- action='store_true',
- help="don't lookup existing DOIs, just insert (clobbers; only for fast bootstrap)")
- sub_datacite.add_argument('--debug',
- action='store_true',
- help="write converted JSON to stdout")
- sub_datacite.add_argument('--insert-log-file',
- default='',
+ default=None,
type=str,
- help="write inserted documents into file (for debugging)")
+ )
+ sub_datacite.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
+ sub_datacite.add_argument(
+ "--bezerk-mode",
+ action="store_true",
+ help="don't lookup existing DOIs, just insert (clobbers; only for fast bootstrap)",
+ )
+ sub_datacite.add_argument(
+ "--debug", action="store_true", help="write converted JSON to stdout"
+ )
+ sub_datacite.add_argument(
+ "--insert-log-file",
+ default="",
+ type=str,
+ help="write inserted documents into file (for debugging)",
+ )
sub_datacite.set_defaults(
func=run_datacite,
auth_var="FATCAT_AUTH_WORKER_DATACITE",
)
- sub_doaj_article = subparsers.add_parser('doaj-article',
- help="import doaj.org article metadata")
- sub_doaj_article.add_argument('json_file',
+ sub_doaj_article = subparsers.add_parser(
+ "doaj-article", help="import doaj.org article metadata"
+ )
+ sub_doaj_article.add_argument(
+ "json_file",
help="File with JSON lines from DOAJ API (or bulk dump) to import from",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_doaj_article.add_argument('--issn-map-file',
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_doaj_article.add_argument(
+ "--issn-map-file",
help="ISSN to ISSN-L mapping file",
- default=None, type=argparse.FileType('r'))
- sub_doaj_article.add_argument('--kafka-mode',
- action='store_true',
- help="consume from kafka topic (not stdin)")
- sub_doaj_article.add_argument('--do-updates',
- action='store_true',
- help="update any pre-existing release entities")
+ default=None,
+ type=argparse.FileType("r"),
+ )
+ sub_doaj_article.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
+ sub_doaj_article.add_argument(
+ "--do-updates", action="store_true", help="update any pre-existing release entities"
+ )
sub_doaj_article.set_defaults(
func=run_doaj_article,
auth_var="FATCAT_AUTH_WORKER_DOAJ",
)
- sub_dblp_release = subparsers.add_parser('dblp-release',
- help="import dblp release metadata")
- sub_dblp_release.add_argument('xml_file',
+ sub_dblp_release = subparsers.add_parser(
+ "dblp-release", help="import dblp release metadata"
+ )
+ sub_dblp_release.add_argument(
+ "xml_file",
help="File with DBLP XML to import from",
- default=sys.stdin, type=argparse.FileType('rb'))
- sub_dblp_release.add_argument('--dblp-container-map-file',
+ default=sys.stdin,
+ type=argparse.FileType("rb"),
+ )
+ sub_dblp_release.add_argument(
+ "--dblp-container-map-file",
help="file path to dblp prefix to container_id TSV file",
- default=None, type=argparse.FileType('r'))
- sub_dblp_release.add_argument('--do-updates',
- action='store_true',
- help="update any pre-existing release entities")
- sub_dblp_release.add_argument('--dump-json-mode',
- action='store_true',
- help="print release entities to stdout instead of importing")
+ default=None,
+ type=argparse.FileType("r"),
+ )
+ sub_dblp_release.add_argument(
+ "--do-updates", action="store_true", help="update any pre-existing release entities"
+ )
+ sub_dblp_release.add_argument(
+ "--dump-json-mode",
+ action="store_true",
+ help="print release entities to stdout instead of importing",
+ )
sub_dblp_release.set_defaults(
func=run_dblp_release,
auth_var="FATCAT_AUTH_WORKER_DBLP",
)
- sub_dblp_container = subparsers.add_parser('dblp-container',
- help="import dblp container metadata")
- sub_dblp_container.add_argument('json_file',
+ sub_dblp_container = subparsers.add_parser(
+ "dblp-container", help="import dblp container metadata"
+ )
+ sub_dblp_container.add_argument(
+ "json_file",
help="File with DBLP container JSON to import from (see extra/dblp/)",
- default=sys.stdin, type=argparse.FileType('rb'))
- sub_dblp_container.add_argument('--dblp-container-map-file',
+ default=sys.stdin,
+ type=argparse.FileType("rb"),
+ )
+ sub_dblp_container.add_argument(
+ "--dblp-container-map-file",
help="file path to dblp pre-existing prefix to container_id TSV file",
- default=None, type=argparse.FileType('r'))
- sub_dblp_container.add_argument('--dblp-container-map-output',
+ default=None,
+ type=argparse.FileType("r"),
+ )
+ sub_dblp_container.add_argument(
+ "--dblp-container-map-output",
help="file path to output new dblp container map TSV to",
- default=None, type=argparse.FileType('w'))
- sub_dblp_container.add_argument('--issn-map-file',
+ default=None,
+ type=argparse.FileType("w"),
+ )
+ sub_dblp_container.add_argument(
+ "--issn-map-file",
help="ISSN to ISSN-L mapping file",
- default=None, type=argparse.FileType('r'))
- sub_dblp_container.add_argument('--do-updates',
- action='store_true',
- help="update any pre-existing container entities")
+ default=None,
+ type=argparse.FileType("r"),
+ )
+ sub_dblp_container.add_argument(
+ "--do-updates", action="store_true", help="update any pre-existing container entities"
+ )
sub_dblp_container.set_defaults(
func=run_dblp_container,
auth_var="FATCAT_AUTH_WORKER_DBLP",
)
- sub_file_meta = subparsers.add_parser('file-meta',
- help="simple update-only importer for file metadata")
+ sub_file_meta = subparsers.add_parser(
+ "file-meta", help="simple update-only importer for file metadata"
+ )
sub_file_meta.set_defaults(
func=run_file_meta,
auth_var="FATCAT_API_AUTH_TOKEN",
)
- sub_file_meta.add_argument('json_file',
+ sub_file_meta.add_argument(
+ "json_file",
help="File with jsonlines from file_meta schema to import from",
- default=sys.stdin, type=argparse.FileType('r'))
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
- sub_fileset = subparsers.add_parser('fileset',
- help="generic fileset importer")
+ sub_fileset = subparsers.add_parser("fileset", help="generic fileset importer")
sub_fileset.set_defaults(
func=run_fileset,
auth_var="FATCAT_API_AUTH_TOKEN",
)
- sub_fileset.add_argument('json_file',
+ sub_fileset.add_argument(
+ "json_file",
help="File with jsonlines of fileset entities to import",
- default=sys.stdin, type=argparse.FileType('r'))
- sub_fileset.add_argument('--skip-release-fileset-check',
- action='store_true',
- help="create without checking if releases already have related filesets")
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_fileset.add_argument(
+ "--skip-release-fileset-check",
+ action="store_true",
+ help="create without checking if releases already have related filesets",
+ )
args = parser.parse_args()
if not args.__dict__.get("func"):
@@ -889,15 +1118,18 @@ def main():
# allow editgroup description override via env variable (but CLI arg takes
# precedence)
- if not args.editgroup_description_override \
- and os.environ.get('FATCAT_EDITGROUP_DESCRIPTION'):
- args.editgroup_description_override = os.environ.get('FATCAT_EDITGROUP_DESCRIPTION')
+ if not args.editgroup_description_override and os.environ.get(
+ "FATCAT_EDITGROUP_DESCRIPTION"
+ ):
+ args.editgroup_description_override = os.environ.get("FATCAT_EDITGROUP_DESCRIPTION")
args.api = authenticated_api(
args.host_url,
# token is an optional kwarg (can be empty string, None, etc)
- token=os.environ.get(args.auth_var))
+ token=os.environ.get(args.auth_var),
+ )
args.func(args)
-if __name__ == '__main__':
+
+if __name__ == "__main__":
main()