diff options
| -rwxr-xr-x | python/fatcat_cleanup.py | 6 | ||||
| -rwxr-xr-x | python/fatcat_import.py | 43 | ||||
| -rwxr-xr-x | python/fatcat_tools/importers/cdl_dash_dat.py | 21 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 5 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 12 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/shadow.py | 5 | ||||
| -rwxr-xr-x | python/fatcat_tools/importers/wayback_static.py | 19 | ||||
| -rw-r--r-- | python/fatcat_web/editing_routes.py | 32 | ||||
| -rw-r--r-- | python/fatcat_web/entity_helpers.py | 21 | ||||
| -rw-r--r-- | python/fatcat_web/ref_routes.py | 24 | ||||
| -rw-r--r-- | python/fatcat_web/routes.py | 77 | ||||
| -rw-r--r-- | python/tests/transform_csl.py | 4 | 
12 files changed, 207 insertions, 62 deletions
| diff --git a/python/fatcat_cleanup.py b/python/fatcat_cleanup.py index 4e11139e..04ee2bd9 100755 --- a/python/fatcat_cleanup.py +++ b/python/fatcat_cleanup.py @@ -1,14 +1,14 @@  #!/usr/bin/env python3 +import argparse  import os  import sys -import argparse +  import raven  from fatcat_tools import authenticated_api +from fatcat_tools.cleanups import FileCleaner  from fatcat_tools.importers import JsonLinePusher -from fatcat_tools.cleanups import * -  # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable  sentry_client = raven.Client() diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 41a51ad4..3225688c 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -1,13 +1,50 @@  #!/usr/bin/env python3 +import argparse  import os  import sys -import argparse +  import raven  from fatcat_tools import authenticated_api -from fatcat_tools.importers import * - +from fatcat_tools.importers import ( +    ARABESQUE_MATCH_WHERE_CLAUSE, +    ArabesqueMatchImporter, +    ArxivRawImporter, +    Bs4XmlFileListPusher, +    Bs4XmlFilePusher, +    Bs4XmlLargeFilePusher, +    Bs4XmlLinesPusher, +    ChoculaImporter, +    CrossrefImporter, +    DataciteImporter, +    DblpContainerImporter, +    DblpReleaseImporter, +    DoajArticleImporter, +    FileMetaImporter, +    FilesetImporter, +    GrobidMetadataImporter, +    IngestFileResultImporter, +    IngestFilesetResultImporter, +    IngestWebResultImporter, +    JalcImporter, +    JournalMetadataImporter, +    JsonLinePusher, +    JstorImporter, +    KafkaBs4XmlPusher, +    KafkaJsonPusher, +    LinePusher, +    MatchedImporter, +    OrcidImporter, +    PubmedImporter, +    SavePaperNowFileImporter, +    SavePaperNowFilesetImporter, +    SavePaperNowWebImporter, +    ShadowLibraryImporter, +    SqlitePusher, +    auto_cdl_dash_dat, +    auto_wayback_static, +)  # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable  sentry_client = raven.Client() diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index 36a2f9a6..0340f6a3 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -1,16 +1,25 @@  #!/usr/bin/env python3 -import os -import sys -import json -import magic -import urllib  import hashlib +import json  import mimetypes +import os  import subprocess +import sys +import urllib  import fatcat_openapi_client -from fatcat_openapi_client import * +import magic +from fatcat_openapi_client import ( +    Editgroup, +    FilesetEntity, +    FilesetFile, +    ReleaseAbstract, +    ReleaseContrib, +    ReleaseEntity, +    ReleaseExtIds, +) +  from .common import clean  from .crossref import lookup_license_slug diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index d95c5847..e0e4fc3c 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -1,8 +1,9 @@  import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS +from fatcat_tools.normal import clean_doi + +from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, make_rel_url  class MatchedImporter(EntityImporter): diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index c34fd7d6..c9907c5e 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -1,13 +1,15 @@ -import sys -import json  import datetime +import json +import sys  import warnings -from bs4 import BeautifulSoup  import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, clean, LANG_MAP_MARC +from bs4 import BeautifulSoup + +from fatcat_tools.normal import clean_doi, clean_issn, clean_pmcid, clean_pmid + +from .common import LANG_MAP_MARC, EntityImporter, clean  # from: https://www.ncbi.nlm.nih.gov/books/NBK3827/table/pubmedhelp.T.publication_types/?report=objectonly  PUBMED_RELEASE_TYPE_MAP = { diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py index c04e9aa8..fa9b4d10 100644 --- a/python/fatcat_tools/importers/shadow.py +++ b/python/fatcat_tools/importers/shadow.py @@ -1,8 +1,9 @@  import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS +from fatcat_tools.normal import clean_doi, clean_isbn13, clean_pmid + +from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, make_rel_url  class ShadowLibraryImporter(EntityImporter): diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py index 5b23347f..196f86ff 100755 --- a/python/fatcat_tools/importers/wayback_static.py +++ b/python/fatcat_tools/importers/wayback_static.py @@ -6,16 +6,23 @@ Helpers to create Web Capture entities from extracted wayback content.  Works as a stand-alone script (for debugging) or as library routines.  """ -import sys -import json -import hashlib -import requests -import datetime  import argparse +import datetime +import hashlib +import json  import subprocess +import sys + +import requests  from bs4 import BeautifulSoup +from fatcat_openapi_client import ( +    ApiClient, +    Editgroup, +    WebcaptureCdxLine, +    WebcaptureEntity, +    WebcaptureUrl, +) -from fatcat_openapi_client import *  from .common import b32_hex  CDX_API_BASE = "https://web.archive.org/cdx/search/cdx" diff --git a/python/fatcat_web/editing_routes.py b/python/fatcat_web/editing_routes.py index 61aade72..d888735a 100644 --- a/python/fatcat_web/editing_routes.py +++ b/python/fatcat_web/editing_routes.py @@ -1,17 +1,31 @@  from typing import Optional -from flask import render_template, abort, redirect, session, flash -from flask_login import login_required - -from fatcat_openapi_client import * +from fatcat_openapi_client import ( +    ContainerEntity, +    CreatorEntity, +    Editgroup, +    EntityEdit, +    FileEntity, +    FilesetEntity, +    ReleaseEntity, +    WebcaptureEntity, +    WorkEntity, +)  from fatcat_openapi_client.rest import ApiException -from fatcat_tools.transforms import * -from fatcat_web import app, api, auth_api -from fatcat_web.search import * -from fatcat_web.forms import * -from fatcat_web.entity_helpers import * +from flask import abort, flash, redirect, render_template, session +from flask_login import login_required +from fatcat_tools.transforms import entity_from_toml +from fatcat_web import api, app, auth_api +from fatcat_web.entity_helpers import generic_get_editgroup_entity, generic_get_entity +from fatcat_web.forms import ( +    ContainerEntityForm, +    EntityEditForm, +    EntityTomlForm, +    FileEntityForm, +    ReleaseEntityForm, +)  ### Helper Methods ########################################################## diff --git a/python/fatcat_web/entity_helpers.py b/python/fatcat_web/entity_helpers.py index ab4c16b5..26371341 100644 --- a/python/fatcat_web/entity_helpers.py +++ b/python/fatcat_web/entity_helpers.py @@ -1,11 +1,26 @@ -from flask import abort -from fatcat_openapi_client import * +from fatcat_openapi_client import ( +    ContainerEntity, +    CreatorEntity, +    FileEntity, +    FilesetEntity, +    ReleaseEntity, +    ReleaseExtIds, +    WebcaptureEntity, +    WorkEntity, +)  from fatcat_openapi_client.rest import ApiException, ApiValueError -from fatcat_tools.transforms import * +from flask import abort + +from fatcat_tools.transforms import ( +    container_to_elasticsearch, +    file_to_elasticsearch, +    release_to_elasticsearch, +)  from fatcat_web import api  from fatcat_web.hacks import strip_extlink_xml, wayback_suffix +  def enrich_container_entity(entity):      if entity.state in ('redirect', 'deleted'):          return entity diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py index 2d8ed413..eed3f1df 100644 --- a/python/fatcat_web/ref_routes.py +++ b/python/fatcat_web/ref_routes.py @@ -5,18 +5,28 @@ Flask endpoints for reference (citation) endpoints. Eg, listing references  import json -from flask import render_template, request, Response, jsonify -from fatcat_openapi_client import * -from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release +from flask import Response, jsonify, render_template, request +from fuzzycat.grobid_unstructured import ( +    grobid_api_process_citation, +    grobid_ref_to_release, +    transform_grobid_ref_xml, +)  from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches -from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits +from fatcat_tools.references import ( +    RefHits, +    enrich_inbound_refs, +    enrich_outbound_refs, +    get_inbound_refs, +    get_outbound_refs, +)  from fatcat_tools.transforms.access import release_access_options  from fatcat_tools.transforms.entities import entity_to_dict -from fatcat_web import app, api +from fatcat_web import api, app  from fatcat_web.cors import crossdomain -from fatcat_web.forms import * -from fatcat_web.entity_helpers import * +from fatcat_web.entity_helpers import generic_get_entity +from fatcat_web.forms import ReferenceMatchForm +  def _refs_web(direction, release_ident=None, work_ident=None, openlibrary_id=None, wikipedia_article=None) -> RefHits:      offset = request.args.get('offset', '0') diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index e9741b6d..9795adf7 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -1,25 +1,74 @@ -import os  import json +import os +  import citeproc_styles -from flask import render_template, make_response, send_from_directory, \ -    request, url_for, abort, redirect, jsonify, session, Response +from fatcat_openapi_client import EditgroupAnnotation +from fatcat_openapi_client.rest import ApiException, ApiValueError +from flask import ( +    Response, +    abort, +    jsonify, +    make_response, +    redirect, +    render_template, +    request, +    send_from_directory, +    session, +    url_for, +)  from flask_login import login_required  from flask_wtf.csrf import CSRFError -from fatcat_openapi_client import EditgroupAnnotation -from fatcat_openapi_client.rest import ApiException -from fatcat_tools.transforms import * -from fatcat_tools.normal import * -from fatcat_web import app, api, auth_api, priv_api, mwoauth, Config -from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth, handle_wmoauth +from fatcat_tools.normal import ( +    clean_arxiv_id, +    clean_doi, +    clean_isbn13, +    clean_issn, +    clean_orcid, +    clean_pmcid, +    clean_sha1, +    clean_sha256, +) +from fatcat_tools.transforms import citeproc_csl, release_to_csl +from fatcat_web import Config, api, app, auth_api, mwoauth, priv_api +from fatcat_web.auth import ( +    handle_ia_xauth, +    handle_logout, +    handle_token_login, +    handle_wmoauth, +    load_user, +)  from fatcat_web.cors import crossdomain -from fatcat_web.search import * -from fatcat_web.entity_helpers import * -from fatcat_web.graphics import * -from fatcat_web.kafka import * +from fatcat_web.entity_helpers import ( +    generic_get_editgroup_entity, +    generic_get_entity, +    generic_get_entity_revision, +)  from fatcat_web.forms import SavePaperNowForm - +from fatcat_web.graphics import ( +    ia_coverage_histogram, +    preservation_by_date_histogram, +    preservation_by_volume_histogram, +    preservation_by_year_histogram, +) +from fatcat_web.kafka import kafka_pixy_produce +from fatcat_web.search import ( +    FatcatSearchError, +    GenericQuery, +    ReleaseQuery, +    do_container_search, +    do_release_search, +    get_elastic_container_histogram_legacy, +    get_elastic_container_preservation_by_volume, +    get_elastic_container_random_releases, +    get_elastic_container_stats, +    get_elastic_entity_stats, +    get_elastic_preservation_by_date, +    get_elastic_preservation_by_type, +    get_elastic_preservation_by_year, +    get_elastic_search_coverage, +)  ### Generic Entity Views #################################################### diff --git a/python/tests/transform_csl.py b/python/tests/transform_csl.py index 54e85f91..83266669 100644 --- a/python/tests/transform_csl.py +++ b/python/tests/transform_csl.py @@ -2,8 +2,8 @@  import json  import pytest -from fatcat_openapi_client import * -from fixtures import * +from fatcat_openapi_client import ReleaseEntity +from fixtures import api  from import_crossref import crossref_importer  from fatcat_tools.transforms import citeproc_csl, entity_from_json, release_to_csl | 
