aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers')
-rwxr-xr-xpython/fatcat_tools/importers/cdl_dash_dat.py21
-rw-r--r--python/fatcat_tools/importers/matched.py5
-rw-r--r--python/fatcat_tools/importers/pubmed.py12
-rw-r--r--python/fatcat_tools/importers/shadow.py5
-rwxr-xr-xpython/fatcat_tools/importers/wayback_static.py19
5 files changed, 41 insertions, 21 deletions
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py
index 36a2f9a6..0340f6a3 100755
--- a/python/fatcat_tools/importers/cdl_dash_dat.py
+++ b/python/fatcat_tools/importers/cdl_dash_dat.py
@@ -1,16 +1,25 @@
#!/usr/bin/env python3
-import os
-import sys
-import json
-import magic
-import urllib
import hashlib
+import json
import mimetypes
+import os
import subprocess
+import sys
+import urllib
import fatcat_openapi_client
-from fatcat_openapi_client import *
+import magic
+from fatcat_openapi_client import (
+ Editgroup,
+ FilesetEntity,
+ FilesetFile,
+ ReleaseAbstract,
+ ReleaseContrib,
+ ReleaseEntity,
+ ReleaseExtIds,
+)
+
from .common import clean
from .crossref import lookup_license_slug
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index d95c5847..e0e4fc3c 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -1,8 +1,9 @@
import fatcat_openapi_client
-from fatcat_tools.normal import *
-from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS
+from fatcat_tools.normal import clean_doi
+
+from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, make_rel_url
class MatchedImporter(EntityImporter):
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index c34fd7d6..c9907c5e 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -1,13 +1,15 @@
-import sys
-import json
import datetime
+import json
+import sys
import warnings
-from bs4 import BeautifulSoup
import fatcat_openapi_client
-from fatcat_tools.normal import *
-from .common import EntityImporter, clean, LANG_MAP_MARC
+from bs4 import BeautifulSoup
+
+from fatcat_tools.normal import clean_doi, clean_issn, clean_pmcid, clean_pmid
+
+from .common import LANG_MAP_MARC, EntityImporter, clean
# from: https://www.ncbi.nlm.nih.gov/books/NBK3827/table/pubmedhelp.T.publication_types/?report=objectonly
PUBMED_RELEASE_TYPE_MAP = {
diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py
index c04e9aa8..fa9b4d10 100644
--- a/python/fatcat_tools/importers/shadow.py
+++ b/python/fatcat_tools/importers/shadow.py
@@ -1,8 +1,9 @@
import fatcat_openapi_client
-from fatcat_tools.normal import *
-from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS
+from fatcat_tools.normal import clean_doi, clean_isbn13, clean_pmid
+
+from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, make_rel_url
class ShadowLibraryImporter(EntityImporter):
diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py
index 5b23347f..196f86ff 100755
--- a/python/fatcat_tools/importers/wayback_static.py
+++ b/python/fatcat_tools/importers/wayback_static.py
@@ -6,16 +6,23 @@ Helpers to create Web Capture entities from extracted wayback content.
Works as a stand-alone script (for debugging) or as library routines.
"""
-import sys
-import json
-import hashlib
-import requests
-import datetime
import argparse
+import datetime
+import hashlib
+import json
import subprocess
+import sys
+
+import requests
from bs4 import BeautifulSoup
+from fatcat_openapi_client import (
+ ApiClient,
+ Editgroup,
+ WebcaptureCdxLine,
+ WebcaptureEntity,
+ WebcaptureUrl,
+)
-from fatcat_openapi_client import *
from .common import b32_hex
CDX_API_BASE = "https://web.archive.org/cdx/search/cdx"