diff options
Diffstat (limited to 'python')
-rwxr-xr-x | python/fatcat_cleanup.py | 6 | ||||
-rwxr-xr-x | python/fatcat_import.py | 43 | ||||
-rwxr-xr-x | python/fatcat_tools/importers/cdl_dash_dat.py | 21 | ||||
-rw-r--r-- | python/fatcat_tools/importers/matched.py | 5 | ||||
-rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 12 | ||||
-rw-r--r-- | python/fatcat_tools/importers/shadow.py | 5 | ||||
-rwxr-xr-x | python/fatcat_tools/importers/wayback_static.py | 19 | ||||
-rw-r--r-- | python/fatcat_web/editing_routes.py | 32 | ||||
-rw-r--r-- | python/fatcat_web/entity_helpers.py | 21 | ||||
-rw-r--r-- | python/fatcat_web/ref_routes.py | 24 | ||||
-rw-r--r-- | python/fatcat_web/routes.py | 77 | ||||
-rw-r--r-- | python/tests/transform_csl.py | 4 |
12 files changed, 207 insertions, 62 deletions
diff --git a/python/fatcat_cleanup.py b/python/fatcat_cleanup.py index 4e11139e..04ee2bd9 100755 --- a/python/fatcat_cleanup.py +++ b/python/fatcat_cleanup.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 +import argparse import os import sys -import argparse + import raven from fatcat_tools import authenticated_api +from fatcat_tools.cleanups import FileCleaner from fatcat_tools.importers import JsonLinePusher -from fatcat_tools.cleanups import * - # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable sentry_client = raven.Client() diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 41a51ad4..3225688c 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -1,13 +1,50 @@ #!/usr/bin/env python3 +import argparse import os import sys -import argparse + import raven from fatcat_tools import authenticated_api -from fatcat_tools.importers import * - +from fatcat_tools.importers import ( + ARABESQUE_MATCH_WHERE_CLAUSE, + ArabesqueMatchImporter, + ArxivRawImporter, + Bs4XmlFileListPusher, + Bs4XmlFilePusher, + Bs4XmlLargeFilePusher, + Bs4XmlLinesPusher, + ChoculaImporter, + CrossrefImporter, + DataciteImporter, + DblpContainerImporter, + DblpReleaseImporter, + DoajArticleImporter, + FileMetaImporter, + FilesetImporter, + GrobidMetadataImporter, + IngestFileResultImporter, + IngestFilesetResultImporter, + IngestWebResultImporter, + JalcImporter, + JournalMetadataImporter, + JsonLinePusher, + JstorImporter, + KafkaBs4XmlPusher, + KafkaJsonPusher, + LinePusher, + MatchedImporter, + OrcidImporter, + PubmedImporter, + SavePaperNowFileImporter, + SavePaperNowFilesetImporter, + SavePaperNowWebImporter, + ShadowLibraryImporter, + SqlitePusher, + auto_cdl_dash_dat, + auto_wayback_static, +) # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable sentry_client = raven.Client() diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index 36a2f9a6..0340f6a3 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -1,16 +1,25 @@ #!/usr/bin/env python3 -import os -import sys -import json -import magic -import urllib import hashlib +import json import mimetypes +import os import subprocess +import sys +import urllib import fatcat_openapi_client -from fatcat_openapi_client import * +import magic +from fatcat_openapi_client import ( + Editgroup, + FilesetEntity, + FilesetFile, + ReleaseAbstract, + ReleaseContrib, + ReleaseEntity, + ReleaseExtIds, +) + from .common import clean from .crossref import lookup_license_slug diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index d95c5847..e0e4fc3c 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -1,8 +1,9 @@ import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS +from fatcat_tools.normal import clean_doi + +from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, make_rel_url class MatchedImporter(EntityImporter): diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index c34fd7d6..c9907c5e 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -1,13 +1,15 @@ -import sys -import json import datetime +import json +import sys import warnings -from bs4 import BeautifulSoup import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, clean, LANG_MAP_MARC +from bs4 import BeautifulSoup + +from fatcat_tools.normal import clean_doi, clean_issn, clean_pmcid, clean_pmid + +from .common import LANG_MAP_MARC, EntityImporter, clean # from: https://www.ncbi.nlm.nih.gov/books/NBK3827/table/pubmedhelp.T.publication_types/?report=objectonly PUBMED_RELEASE_TYPE_MAP = { diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py index c04e9aa8..fa9b4d10 100644 --- a/python/fatcat_tools/importers/shadow.py +++ b/python/fatcat_tools/importers/shadow.py @@ -1,8 +1,9 @@ import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS +from fatcat_tools.normal import clean_doi, clean_isbn13, clean_pmid + +from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, make_rel_url class ShadowLibraryImporter(EntityImporter): diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py index 5b23347f..196f86ff 100755 --- a/python/fatcat_tools/importers/wayback_static.py +++ b/python/fatcat_tools/importers/wayback_static.py @@ -6,16 +6,23 @@ Helpers to create Web Capture entities from extracted wayback content. Works as a stand-alone script (for debugging) or as library routines. """ -import sys -import json -import hashlib -import requests -import datetime import argparse +import datetime +import hashlib +import json import subprocess +import sys + +import requests from bs4 import BeautifulSoup +from fatcat_openapi_client import ( + ApiClient, + Editgroup, + WebcaptureCdxLine, + WebcaptureEntity, + WebcaptureUrl, +) -from fatcat_openapi_client import * from .common import b32_hex CDX_API_BASE = "https://web.archive.org/cdx/search/cdx" diff --git a/python/fatcat_web/editing_routes.py b/python/fatcat_web/editing_routes.py index 61aade72..d888735a 100644 --- a/python/fatcat_web/editing_routes.py +++ b/python/fatcat_web/editing_routes.py @@ -1,17 +1,31 @@ from typing import Optional -from flask import render_template, abort, redirect, session, flash -from flask_login import login_required - -from fatcat_openapi_client import * +from fatcat_openapi_client import ( + ContainerEntity, + CreatorEntity, + Editgroup, + EntityEdit, + FileEntity, + FilesetEntity, + ReleaseEntity, + WebcaptureEntity, + WorkEntity, +) from fatcat_openapi_client.rest import ApiException -from fatcat_tools.transforms import * -from fatcat_web import app, api, auth_api -from fatcat_web.search import * -from fatcat_web.forms import * -from fatcat_web.entity_helpers import * +from flask import abort, flash, redirect, render_template, session +from flask_login import login_required +from fatcat_tools.transforms import entity_from_toml +from fatcat_web import api, app, auth_api +from fatcat_web.entity_helpers import generic_get_editgroup_entity, generic_get_entity +from fatcat_web.forms import ( + ContainerEntityForm, + EntityEditForm, + EntityTomlForm, + FileEntityForm, + ReleaseEntityForm, +) ### Helper Methods ########################################################## diff --git a/python/fatcat_web/entity_helpers.py b/python/fatcat_web/entity_helpers.py index ab4c16b5..26371341 100644 --- a/python/fatcat_web/entity_helpers.py +++ b/python/fatcat_web/entity_helpers.py @@ -1,11 +1,26 @@ -from flask import abort -from fatcat_openapi_client import * +from fatcat_openapi_client import ( + ContainerEntity, + CreatorEntity, + FileEntity, + FilesetEntity, + ReleaseEntity, + ReleaseExtIds, + WebcaptureEntity, + WorkEntity, +) from fatcat_openapi_client.rest import ApiException, ApiValueError -from fatcat_tools.transforms import * +from flask import abort + +from fatcat_tools.transforms import ( + container_to_elasticsearch, + file_to_elasticsearch, + release_to_elasticsearch, +) from fatcat_web import api from fatcat_web.hacks import strip_extlink_xml, wayback_suffix + def enrich_container_entity(entity): if entity.state in ('redirect', 'deleted'): return entity diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py index 2d8ed413..eed3f1df 100644 --- a/python/fatcat_web/ref_routes.py +++ b/python/fatcat_web/ref_routes.py @@ -5,18 +5,28 @@ Flask endpoints for reference (citation) endpoints. Eg, listing references import json -from flask import render_template, request, Response, jsonify -from fatcat_openapi_client import * -from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release +from flask import Response, jsonify, render_template, request +from fuzzycat.grobid_unstructured import ( + grobid_api_process_citation, + grobid_ref_to_release, + transform_grobid_ref_xml, +) from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches -from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits +from fatcat_tools.references import ( + RefHits, + enrich_inbound_refs, + enrich_outbound_refs, + get_inbound_refs, + get_outbound_refs, +) from fatcat_tools.transforms.access import release_access_options from fatcat_tools.transforms.entities import entity_to_dict -from fatcat_web import app, api +from fatcat_web import api, app from fatcat_web.cors import crossdomain -from fatcat_web.forms import * -from fatcat_web.entity_helpers import * +from fatcat_web.entity_helpers import generic_get_entity +from fatcat_web.forms import ReferenceMatchForm + def _refs_web(direction, release_ident=None, work_ident=None, openlibrary_id=None, wikipedia_article=None) -> RefHits: offset = request.args.get('offset', '0') diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index e9741b6d..9795adf7 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -1,25 +1,74 @@ -import os import json +import os + import citeproc_styles -from flask import render_template, make_response, send_from_directory, \ - request, url_for, abort, redirect, jsonify, session, Response +from fatcat_openapi_client import EditgroupAnnotation +from fatcat_openapi_client.rest import ApiException, ApiValueError +from flask import ( + Response, + abort, + jsonify, + make_response, + redirect, + render_template, + request, + send_from_directory, + session, + url_for, +) from flask_login import login_required from flask_wtf.csrf import CSRFError -from fatcat_openapi_client import EditgroupAnnotation -from fatcat_openapi_client.rest import ApiException -from fatcat_tools.transforms import * -from fatcat_tools.normal import * -from fatcat_web import app, api, auth_api, priv_api, mwoauth, Config -from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth, handle_wmoauth +from fatcat_tools.normal import ( + clean_arxiv_id, + clean_doi, + clean_isbn13, + clean_issn, + clean_orcid, + clean_pmcid, + clean_sha1, + clean_sha256, +) +from fatcat_tools.transforms import citeproc_csl, release_to_csl +from fatcat_web import Config, api, app, auth_api, mwoauth, priv_api +from fatcat_web.auth import ( + handle_ia_xauth, + handle_logout, + handle_token_login, + handle_wmoauth, + load_user, +) from fatcat_web.cors import crossdomain -from fatcat_web.search import * -from fatcat_web.entity_helpers import * -from fatcat_web.graphics import * -from fatcat_web.kafka import * +from fatcat_web.entity_helpers import ( + generic_get_editgroup_entity, + generic_get_entity, + generic_get_entity_revision, +) from fatcat_web.forms import SavePaperNowForm - +from fatcat_web.graphics import ( + ia_coverage_histogram, + preservation_by_date_histogram, + preservation_by_volume_histogram, + preservation_by_year_histogram, +) +from fatcat_web.kafka import kafka_pixy_produce +from fatcat_web.search import ( + FatcatSearchError, + GenericQuery, + ReleaseQuery, + do_container_search, + do_release_search, + get_elastic_container_histogram_legacy, + get_elastic_container_preservation_by_volume, + get_elastic_container_random_releases, + get_elastic_container_stats, + get_elastic_entity_stats, + get_elastic_preservation_by_date, + get_elastic_preservation_by_type, + get_elastic_preservation_by_year, + get_elastic_search_coverage, +) ### Generic Entity Views #################################################### diff --git a/python/tests/transform_csl.py b/python/tests/transform_csl.py index 54e85f91..83266669 100644 --- a/python/tests/transform_csl.py +++ b/python/tests/transform_csl.py @@ -2,8 +2,8 @@ import json import pytest -from fatcat_openapi_client import * -from fixtures import * +from fatcat_openapi_client import ReleaseEntity +from fixtures import api from import_crossref import crossref_importer from fatcat_tools.transforms import citeproc_csl, entity_from_json, release_to_csl |