diff options
Diffstat (limited to 'python/fatcat_merge.py')
-rwxr-xr-x | python/fatcat_merge.py | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/python/fatcat_merge.py b/python/fatcat_merge.py new file mode 100755 index 00000000..7b0ae63b --- /dev/null +++ b/python/fatcat_merge.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 + +""" +Tools for merging entities in various ways. + + group-releases: pull all release entities under a single work + => merges work entities + merge-releases: merge release entities together + => groups files/filesets/webcaptures + merge-containers: merge container entities + merge-files: merge file entities + +Input format is usually JSON lines with keys: + + idents (required): array of string identifiers + primary (optional): single string identifier + +""" + +import os, sys, argparse +from fatcat_tools import authenticated_api +from fatcat_tools.mergers import * +from fatcat_tools.importers import JsonLinePusher + + +def run_group_releases(args): + rg = ReleaseGrouper(args.api, + edit_batch_size=args.batch_size, + dry_run_mode=args.dry_run) + JsonLinePusher(rg, args.json_file).run() + +def run_merge_releases(args): + rm = ReleaseMerger(args.api, + edit_batch_size=args.batch_size, + dry_run_mode=args.dry_run) + JsonLinePusher(rg, args.json_file).run() + +def run_merge_containers(args): + cm = ReleaseMerger(args.api, + edit_batch_size=args.batch_size, + dry_run_mode=args.dry_run) + JsonLinePusher(cm, args.json_file).run() + +def run_merge_files(args): + fm = FileMerger(args.api, + edit_batch_size=args.batch_size, + dry_run_mode=args.dry_run) + JsonLinePusher(fm, args.json_file).run() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--host-url', + default="http://localhost:9411/v0", + help="connect to this host/port") + parser.add_argument('--batch-size', + help="size of batch to send", + default=50, type=int) + parser.add_argument('--editgroup-description-override', + help="editgroup description override", + default=None, type=str) + parser.add_argument('--dry-run', + action='store_true', + help="don't actually commit merges, just count what would have been") + parser.set_defaults( + auth_var="FATCAT_AUTH_API_TOKEN", + ) + subparsers = parser.add_subparsers() + + sub_group_releases = subparsers.add_parser('group-releases') + sub_group_releases.set_defaults(func=run_group_releases) + sub_group_releases.add_argument('json_file', + help="source of merge lines to process (or stdin)", + default=sys.stdin, type=argparse.FileType('r')) + + sub_merge_releases = subparsers.add_parser('merge-releases') + sub_merge_releases.set_defaults(func=run_merge_releases) + sub_merge_releases.add_argument('json_file', + help="source of merge lines to process (or stdin)", + default=sys.stdin, type=argparse.FileType('r')) + + sub_merge_files = subparsers.add_parser('merge-files') + sub_merge_files.set_defaults(func=run_merge_files) + sub_merge_files.add_argument('json_file', + help="source of merge lines to process (or stdin)", + default=sys.stdin, type=argparse.FileType('r')) + + sub_merge_containers = subparsers.add_parser('merge-containers') + sub_merge_containers.set_defaults(func=run_merge_containers) + sub_merge_containers.add_argument('json_file', + help="source of merge lines to process (or stdin)", + default=sys.stdin, type=argparse.FileType('r')) + + args = parser.parse_args() + if not args.__dict__.get("func"): + print("tell me what to do!") + sys.exit(-1) + + # allow editgroup description override via env variable (but CLI arg takes + # precedence) + if not args.editgroup_description_override \ + and os.environ.get('FATCAT_EDITGROUP_DESCRIPTION'): + args.editgroup_description_override = os.environ.get('FATCAT_EDITGROUP_DESCRIPTION') + + args.api = authenticated_api( + args.host_url, + # token is an optional kwarg (can be empty string, None, etc) + token=os.environ.get(args.auth_var)) + args.func(args) + +if __name__ == '__main__': + main() |