summaryrefslogtreecommitdiffstats
path: root/python/fatcat_import.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-10 13:08:23 -0800
committerBryan Newbold <bnewbold@robocracy.org>2021-11-10 13:08:25 -0800
commitab4e1355bf93e3755985f1b5cd2589a78601d253 (patch)
treef50ee1492587fead94410e229963b18f88f203a9 /python/fatcat_import.py
parentc133f3077aa975aa4706a8e5ca894fc1b71fbc67 (diff)
downloadfatcat-ab4e1355bf93e3755985f1b5cd2589a78601d253.tar.gz
fatcat-ab4e1355bf93e3755985f1b5cd2589a78601d253.zip
remove cdl_dash_dat and wayback_static importers
Cleaning out dead code. These importers were used to create demonstration fileset and webcapture entities early in development. They have been replaced by the fileset and webcapture ingest importers.
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-xpython/fatcat_import.py86
1 files changed, 0 insertions, 86 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index 39ef200a..33679868 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -42,8 +42,6 @@ from fatcat_tools.importers import (
SavePaperNowWebImporter,
ShadowLibraryImporter,
SqlitePusher,
- auto_cdl_dash_dat,
- auto_wayback_static,
)
# Yep, a global. Gets DSN from `SENTRY_DSN` environment variable
@@ -315,53 +313,6 @@ def run_shadow_lib(args: argparse.Namespace) -> None:
JsonLinePusher(fmi, args.json_file).run()
-def run_wayback_static(args: argparse.Namespace) -> None:
- api = args.api
-
- # find the release
- if args.release_id:
- release_id = args.release_id
- elif args.extid:
- idtype = args.extid.split(":")[0]
- extid = ":".join(args.extid.split(":")[1:])
- if idtype == "doi":
- release_id = api.lookup_release(doi=extid).ident
- elif idtype == "pmid":
- release_id = api.lookup_release(pmid=extid).ident
- elif idtype == "wikidata":
- release_id = api.lookup_release(wikidata_qid=extid).ident
- else:
- raise NotImplementedError("extid type: {}".format(idtype))
- else:
- raise Exception("need either release_id or extid argument")
-
- # create it
- (editgroup_id, wc) = auto_wayback_static(
- api, release_id, args.wayback_url, editgroup_id=args.editgroup_id
- )
- if not wc:
- return
- print("release_id: {}".format(release_id))
- print("editgroup_id: {}".format(editgroup_id))
- print("webcapture id: {}".format(wc.ident))
- print("link: https://fatcat.wiki/webcapture/{}".format(wc.ident))
-
-
-def run_cdl_dash_dat(args: argparse.Namespace) -> None:
- api = args.api
-
- # create it
- (editgroup_id, release, fs) = auto_cdl_dash_dat(
- api, args.dat_path, release_id=args.release_id, editgroup_id=args.editgroup_id
- )
- if not (fs and release):
- return
- print("release_id: {}".format(release.ident))
- print("editgroup_id: {}".format(editgroup_id))
- print("fileset id: {}".format(fs.ident))
- print("link: https://fatcat.wiki/fileset/{}".format(fs.ident))
-
-
def run_datacite(args: argparse.Namespace) -> None:
dci = DataciteImporter(
args.api,
@@ -899,43 +850,6 @@ def main() -> None:
type=argparse.FileType("r"),
)
- sub_wayback_static = subparsers.add_parser(
- "wayback-static", help="crude crawl+ingest tool for single-page HTML docs from wayback"
- )
- sub_wayback_static.set_defaults(
- func=run_wayback_static,
- auth_var="FATCAT_API_AUTH_TOKEN",
- )
- sub_wayback_static.add_argument(
- "wayback_url", type=str, help="URL of wayback capture to extract from"
- )
- sub_wayback_static.add_argument(
- "--extid", type=str, help="external identifier for release lookup"
- )
- sub_wayback_static.add_argument("--release-id", type=str, help="release entity identifier")
- sub_wayback_static.add_argument(
- "--editgroup-id",
- type=str,
- help="use existing editgroup (instead of creating a new one)",
- )
-
- sub_cdl_dash_dat = subparsers.add_parser(
- "cdl-dash-dat", help="crude helper to import datasets from Dat/CDL mirror pilot project"
- )
- sub_cdl_dash_dat.set_defaults(
- func=run_cdl_dash_dat,
- auth_var="FATCAT_API_AUTH_TOKEN",
- )
- sub_cdl_dash_dat.add_argument(
- "dat_path", type=str, help="local path dat to import (must be the dat discovery key)"
- )
- sub_cdl_dash_dat.add_argument("--release-id", type=str, help="release entity identifier")
- sub_cdl_dash_dat.add_argument(
- "--editgroup-id",
- type=str,
- help="use existing editgroup (instead of creating a new one)",
- )
-
sub_datacite = subparsers.add_parser("datacite", help="import datacite.org metadata")
sub_datacite.add_argument(
"json_file",