diff options
Diffstat (limited to 'python/fatcat_import.py')
| -rwxr-xr-x | python/fatcat_import.py | 15 | 
1 files changed, 15 insertions, 0 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index ad4de0e2..843685aa 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -166,6 +166,11 @@ def run_grobid_metadata(args):          bezerk_mode=args.bezerk_mode)      LinePusher(fmi, args.tsv_file).run() +def run_shadow_lib(args): +    fmi = ShadowLibraryImporter(args.api, +        edit_batch_size=100) +    JsonLinePusher(fmi, args.json_file).run() +  def run_wayback_static(args):      api = args.api @@ -473,6 +478,16 @@ def main():          action='store_true',          help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)") +    sub_shadow_lib = subparsers.add_parser('shadow-lib', +        help="create release and file entities based on GROBID PDF metadata extraction") +    sub_shadow_lib.set_defaults( +        func=run_shadow_lib, +        auth_var="FATCAT_AUTH_WORKER_SHADOW", +    ) +    sub_shadow_lib.add_argument('json_file', +        help="JSON file to import from (or stdin)", +        default=sys.stdin, type=argparse.FileType('r')) +      sub_wayback_static = subparsers.add_parser('wayback-static',          help="crude crawl+ingest tool for single-page HTML docs from wayback")      sub_wayback_static.set_defaults(  | 
