summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin@archive.org>2020-07-10 21:32:41 +0000
committerMartin Czygan <martin@archive.org>2020-07-10 21:32:41 +0000
commit3c266e07771271241aa8cff3e3199a45109362af (patch)
tree73fa6aedf1bbfeffeac9c94593f5f9c4f2dd645b
parentfdf1028c19b0623e30b91e49ffa65ed130dcfdc1 (diff)
parentc9d8550be4bab808c2bad0b0d3642a71075202c0 (diff)
downloadfatcat-3c266e07771271241aa8cff3e3199a45109362af.tar.gz
fatcat-3c266e07771271241aa8cff3e3199a45109362af.zip
datacite: resolve formatting issues in tests
-rw-r--r--extra/bulk_download/README.md40
-rw-r--r--extra/elasticsearch/sql_queries.md8
-rw-r--r--notes/bulk_edits/2020-03-23_jalc.md23
-rw-r--r--notes/cleanup_tasks.txt18
-rw-r--r--notes/example_entities.txt26
-rw-r--r--notes/merge_releases_examples.txt21
-rw-r--r--python/.flake813
-rw-r--r--python/Makefile30
-rw-r--r--python/TODO10
-rwxr-xr-xpython/fatcat_cleanup.py4
-rwxr-xr-xpython/fatcat_export.py6
-rwxr-xr-xpython/fatcat_import.py4
-rwxr-xr-xpython/fatcat_review.py3
-rw-r--r--python/fatcat_tools/api_auth.py4
-rw-r--r--python/fatcat_tools/cleanups/common.py1
-rw-r--r--python/fatcat_tools/cleanups/files.py2
-rw-r--r--python/fatcat_tools/harvest/doi_registrars.py7
-rw-r--r--python/fatcat_tools/harvest/harvest_common.py8
-rw-r--r--python/fatcat_tools/harvest/oaipmh.py10
-rw-r--r--python/fatcat_tools/harvest/pubmed.py2
-rw-r--r--python/fatcat_tools/importers/arabesque.py7
-rw-r--r--python/fatcat_tools/importers/arxiv.py9
-rwxr-xr-xpython/fatcat_tools/importers/cdl_dash_dat.py4
-rw-r--r--python/fatcat_tools/importers/chocula.py3
-rw-r--r--python/fatcat_tools/importers/common.py26
-rw-r--r--python/fatcat_tools/importers/crossref.py8
-rw-r--r--python/fatcat_tools/importers/datacite.py3
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py2
-rw-r--r--python/fatcat_tools/importers/ingest.py7
-rw-r--r--python/fatcat_tools/importers/jalc.py3
-rw-r--r--python/fatcat_tools/importers/journal_metadata.py3
-rw-r--r--python/fatcat_tools/importers/jstor.py2
-rw-r--r--python/fatcat_tools/importers/matched.py8
-rw-r--r--python/fatcat_tools/importers/orcid.py4
-rw-r--r--python/fatcat_tools/importers/pubmed.py6
-rw-r--r--python/fatcat_tools/importers/shadow.py5
-rw-r--r--python/fatcat_tools/kafka.py2
-rw-r--r--python/fatcat_tools/normal.py1
-rw-r--r--python/fatcat_tools/reviewers/review_common.py7
-rw-r--r--python/fatcat_tools/transforms/csl.py14
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py12
-rw-r--r--python/fatcat_tools/transforms/entities.py1
-rw-r--r--python/fatcat_tools/transforms/ingest.py1
-rw-r--r--python/fatcat_tools/workers/changelog.py1
-rw-r--r--python/fatcat_tools/workers/elasticsearch.py2
-rw-r--r--python/fatcat_tools/workers/worker_common.py9
-rwxr-xr-xpython/fatcat_transform.py10
-rwxr-xr-xpython/fatcat_util.py7
-rw-r--r--python/fatcat_web/auth.py6
-rw-r--r--python/fatcat_web/editing_routes.py22
-rw-r--r--python/fatcat_web/forms.py5
-rw-r--r--python/fatcat_web/graphics.py1
-rw-r--r--python/fatcat_web/routes.py7
-rw-r--r--python/fatcat_web/search.py3
-rw-r--r--python/fatcat_web/web_config.py1
-rwxr-xr-xpython/fatcat_worker.py1
-rw-r--r--python/shell.py1
-rw-r--r--python/tests/api_annotations.py5
-rw-r--r--python/tests/api_containers.py8
-rw-r--r--python/tests/api_creators.py7
-rw-r--r--python/tests/api_editgroups.py3
-rw-r--r--python/tests/api_editor.py9
-rw-r--r--python/tests/api_entity_editing.py4
-rw-r--r--python/tests/api_files.py7
-rw-r--r--python/tests/api_filesets.py6
-rw-r--r--python/tests/api_misc.py6
-rw-r--r--python/tests/api_releases.py6
-rw-r--r--python/tests/api_webcaptures.py5
-rw-r--r--python/tests/citation_efficiency.py6
-rw-r--r--python/tests/clean_files.py3
-rw-r--r--python/tests/fixtures.py5
-rw-r--r--python/tests/harvest_crossref.py1
-rw-r--r--python/tests/harvest_datacite.py1
-rw-r--r--python/tests/harvest_pubmed.py6
-rw-r--r--python/tests/harvest_state.py2
-rw-r--r--python/tests/import_arabesque.py3
-rw-r--r--python/tests/import_arxiv.py6
-rw-r--r--python/tests/import_crossref.py6
-rw-r--r--python/tests/import_datacite.py7
-rw-r--r--python/tests/import_grobid_metadata.py3
-rw-r--r--python/tests/import_ingest.py1
-rw-r--r--python/tests/import_jalc.py6
-rw-r--r--python/tests/import_journal_metadata.py3
-rw-r--r--python/tests/import_jstor.py6
-rw-r--r--python/tests/import_matched.py3
-rw-r--r--python/tests/import_orcid.py3
-rw-r--r--python/tests/import_pubmed.py7
-rw-r--r--python/tests/import_shadow.py4
-rw-r--r--python/tests/importer.py5
-rw-r--r--python/tests/subentity_state.py6
-rw-r--r--python/tests/tools_api.py1
-rw-r--r--python/tests/transform_csl.py4
-rw-r--r--python/tests/transform_elasticsearch.py6
-rw-r--r--python/tests/transform_ingest.py5
-rw-r--r--python/tests/web_auth.py3
-rw-r--r--python/tests/web_citation_csl.py2
-rw-r--r--python/tests/web_editgroup.py5
-rw-r--r--python/tests/web_editing.py4
-rw-r--r--python/tests/web_editor.py4
-rw-r--r--python/tests/web_entity_views.py4
-rw-r--r--python/tests/web_routes.py4
-rw-r--r--python/tests/web_search.py3
-rw-r--r--rust/Makefile2
103 files changed, 319 insertions, 341 deletions
diff --git a/extra/bulk_download/README.md b/extra/bulk_download/README.md
new file mode 100644
index 00000000..83b92fd9
--- /dev/null
+++ b/extra/bulk_download/README.md
@@ -0,0 +1,40 @@
+
+## Download Fatcat Fulltext from web.archive.org in Bulk
+
+These quick-and-dirty directions use UNIX utilities to download from the
+Internet Archive (either in the wayback machine or archive.org). To make a
+proper mirror (eg, for research or preservation use), you would want to verify
+hashes (fixity), handle additional retries, and handle files which are not
+preserved in Internet Archive, retain linkage between files and fatcat
+identifiers, etc.
+
+You can download a file entity dump from the most recent "Bulk Metadata Export"
+item from the [snapshots and exports collection](https://archive.org/details/fatcat_snapshots_and_exports?sort=-publicdate).
+
+Create a TSV file containing the SHA1 and a single URL for each file
+entity:
+
+ zcat file_export.json.gz \
+ | grep '"application/pdf"'
+ | jq -cr '.sha1 as $sha1 | .urls | map(select((.url | startswith("https://web.archive.org/web/")) or (.url | startswith("https://archive.org/download/")))) | select(. != []) | [$sha1, .[0].url] | @tsv' \
+ > fatcat_files_sha1_iaurl.tsv
+
+Then use the GNU `parallel` command to call `curl` in parallel to fetch files.
+The `-j` argument controls parallelism. Please don't create exessive load on
+Internet Archive infrastructure by downloading with too many threads. 10
+parallel threads is a decent amount of load.
+
+ cat fatcat_files_sha1_iaurl.tsv \
+ | awk '{print "curl -Lfs --write-out \"%{http_code}\\t" $1 "\\t%{url_effective}\\n\" \"" $2 "\" -o ", $1 ".pdf"}' \
+ | parallel --bar -j4 {} \
+ > fetch_status.log
+
+This will write out a status log containing the HTTP status code, expected file
+SHA1, and attempted URL. You can check for errors (and potentially try) with:
+
+ grep -v "^200" fetch_status.log
+
+Or, count status codes:
+
+ cut -f1 fetch_status.log | sort | uniq -c | sort -nr
+
diff --git a/extra/elasticsearch/sql_queries.md b/extra/elasticsearch/sql_queries.md
new file mode 100644
index 00000000..3ea168e5
--- /dev/null
+++ b/extra/elasticsearch/sql_queries.md
@@ -0,0 +1,8 @@
+
+Top missing OA journals by `container_id`:
+
+ POST _xpack/sql?format=txt
+ {
+ "query": "SELECT container_id, count(*) from fatcat_release WHERE preservation = 'none' AND is_oa = true GROUP BY container_id ORDER BY count(*) DESC LIMIT 20"
+ }
+
diff --git a/notes/bulk_edits/2020-03-23_jalc.md b/notes/bulk_edits/2020-03-23_jalc.md
new file mode 100644
index 00000000..d63c3759
--- /dev/null
+++ b/notes/bulk_edits/2020-03-23_jalc.md
@@ -0,0 +1,23 @@
+
+2019-10-01 JaLC metadata snapshot: <https://archive.org/download/jalc-bulk-metadata-2019>
+
+Extracted .rdf file instead of piping it through zcat.
+
+Use correct bot:
+
+ export FATCAT_AUTH_WORKER_JALC=blah
+
+Start small; do a random bunch (10k) single-threaded to pre-create containers:
+
+ head -n100 /srv/fatcat/datasets/JALC-LOD-20191001.rdf | ./fatcat_import.py --batch-size 100 jalc - /srv/fatcat/datasets/ISSN-to-ISSN-L.txt
+ shuf -n100 /srv/fatcat/datasets/JALC-LOD-20191001.rdf | ./fatcat_import.py --batch-size 100 jalc - /srv/fatcat/datasets/ISSN-to-ISSN-L.txt
+ shuf -n10000 /srv/fatcat/datasets/JALC-LOD-20191001.rdf | ./fatcat_import.py --batch-size 100 jalc - /srv/fatcat/datasets/ISSN-to-ISSN-L.txt
+
+Seemed like lots of individual containers getting added after repeating, so
+just going to import single-threaded to avoid duplicate container creation:
+
+ cat /srv/fatcat/datasets/JALC-LOD-20191001.rdf | ./fatcat_import.py --batch-size 100 jalc - /srv/fatcat/datasets/ISSN-to-ISSN-L.txt
+ => Counter({'total': 8419745, 'exists': 6480683, 'insert': 1934082, 'skip': 4980, 'inserted.container': 134, 'update': 0})
+
+Had a bit fewer than 4,568,120 "doi_registrar:jalc" releases before this
+import, 6,502,202 after (based on `doi_registrar:jalc` query).
diff --git a/notes/cleanup_tasks.txt b/notes/cleanup_tasks.txt
new file mode 100644
index 00000000..bf418e59
--- /dev/null
+++ b/notes/cleanup_tasks.txt
@@ -0,0 +1,18 @@
+
+Cambridge Chemical Database (NCI)
+
+ doi_prefix:10.3406 release_type:article
+
+ 193,346+ entities
+
+ should be 'dataset' not 'article'
+
+ datacite importer
+
+Frontiers
+
+ Frontiers non-PDF abstracts, which have DOIs like `10.3389/conf.*`. Should
+ crawl these, but `release_type` should be... `abstract`? There are at least
+ 18,743 of these. Should be fixed in both crossref-bot, then a retro-active
+ cleanup.
+
diff --git a/notes/example_entities.txt b/notes/example_entities.txt
new file mode 100644
index 00000000..416da610
--- /dev/null
+++ b/notes/example_entities.txt
@@ -0,0 +1,26 @@
+
+errata/update:
+ Fourth Test of General Relativity: Preliminary Results
+ 10.1103/physrevlett.20.1265
+ 10.1103/physrevlett.21.266.3
+
+ same title; later is errata to the first.
+ very minor: The term "baud length" was consistently misprinted as "band length."
+
+DOIs for individual images
+ https://commons.wikimedia.org/wiki/Category:Media_from_Williams_et_al._2010_-_10.1371/journal.pone.0010676
+
+long-tail journal not in fatcat; web-native, tricky to crawl
+ https://angryoldmanmagazine.com/
+
+dataset
+ "ISSN-Matching of Gold OA Journals (ISSN-GOLD-OA) 2.0"
+ https://pub.uni-bielefeld.de/data/2913654
+ 2 files
+ has DOI: 10.4119/unibi/2913654
+
+release group; single PDF is valid copy of two DOIs:
+ https://fatcat.wiki/file/wr64e37yvfcidgbowtslx7omne
+ 10.5167/uzh-146424
+ 10.1016/j.physletb.2017.12.006
+ ALSO: has CC-BY license_slug
diff --git a/notes/merge_releases_examples.txt b/notes/merge_releases_examples.txt
new file mode 100644
index 00000000..ca65705e
--- /dev/null
+++ b/notes/merge_releases_examples.txt
@@ -0,0 +1,21 @@
+
+https://fatcat.wiki/release/search?q=Validation+of+middle-atmospheric+campaign-based+water+vapour+measured+by+the+ground-based+microwave+radiometer
+
+ 4 releases, all dois. 3x have same author list, 1 same authors different order
+
+https://fatcat.wiki/release/search?q=Perspectives+and+pregnancy+outcomes+of+maternal+Ramadan+fasting+in+the+second+trimester+of+pregnancy
+
+ 6 releases:
+ 2 figshare article
+ 2 figshare files
+ 1 primary
+ 1 correction
+
+https://figshare.com/articles/Plasmodium_falciparum_evades_innate_immunity_by_hybrid_ABO_blood_group_phenotype_formation/8208689/119
+
+ 119 versions (!)
+
+https://fatcat.wiki/release/search?q=NeuroTrends+Visualization
+
+ 45 versions across two figshare works
+
diff --git a/python/.flake8 b/python/.flake8
new file mode 100644
index 00000000..34f6131c
--- /dev/null
+++ b/python/.flake8
@@ -0,0 +1,13 @@
+[flake8]
+# TODO: ANN for better annotation coverage
+select = C,E,F,W
+# The ignores starting with "E251" should be removed after using 'black'
+ignore = F405,F403,W503,E231,E203,E501,E226,E711,E713,E265,ANN101,ANN204,ANN102,E251,E128,E302,E261,E241,E201,E202,E266,E124,E305,E225,W504,E123,E122,E125,E121,E129,E126,E712,W191,E101
+# TODO: should reduce max-complexity
+max-complexity = 50
+exclude = .git,__pycache__,.venv
+max-line-length = 120
+per-file-ignores =
+ */__init__.py: F401
+ tests/*.py: F401,F811
+ tests/transform_csl.py: W291
diff --git a/python/Makefile b/python/Makefile
index 182bc739..4c8ff45f 100644
--- a/python/Makefile
+++ b/python/Makefile
@@ -6,14 +6,34 @@ SHELL = /bin/bash
help: ## Print info about all commands
@echo "Commands:"
@echo
- @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[01;32m%-20s\033[0m %s\n", $$1, $$2}'
+ @grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[01;32m%-20s\033[0m %s\n", $$1, $$2}'
+
+.PHONY: dep
+dep: ## Create local virtualenv using pipenv
+ pipenv install --dev
+
+.PHONY: lint
+lint: ## Run lints (eg, flake8, mypy)
+ pipenv run flake8 *.py tests/ fatcat_web/ fatcat_tools/ --select=E9,F63,F7,F82
+ pipenv run flake8 *.py tests/ fatcat_web/ fatcat_tools/ --exit-zero
+
+.PHONY: mypy
+mypy: ## Run mypy type checks (not part of regular lint yet)
+ pipenv run mypy *.py fatcat_web/ fatcat_tools/ --ignore-missing-imports
+
+# Not ready for 'black' yet
+#.PHONY: fmt
+#fmt: ## Run code formating on all source code
+# pipenv run black *.py fatcat_web/ fatcat_tools/ tests/
.PHONY: test
-test: ## Run all tests and lints
- curl --silent localhost:9411/v0/changelog > /dev/null || (echo "API not running locally, bailing early from tests" && exit 1)
+test: lint ## Run all tests and lints
+ @curl --silent localhost:9411/v0/changelog > /dev/null || (echo "API not running locally, bailing early from tests" && exit 1)
pipenv run pytest
- pipenv run pylint -j 0 -E fatcat*.py fatcat_tools fatcat_web tests/*.py
- pipenv run flake8 tests/ fatcat_web/ fatcat_tools/ *.py --count --select=E9,F63,F7,F82 --show-source --statistics
+
+.PHONY: coverage
+coverage: ## Run all tests with coverage
+ pipenv run pytest --cov
.PHONY: test-cli
test-cli: ## Run CLI commands. WARNING: may mutate local database
diff --git a/python/TODO b/python/TODO
index fdb72849..52b2b8fe 100644
--- a/python/TODO
+++ b/python/TODO
@@ -1,4 +1,14 @@
+improve argparse usage
+ change --host-url to --fatcat-api-url
+ add 'help=' to all CLI sub-commands; improves --help output
+ do ArgumentDefaultsHelpFormatter everywhere
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+Try stubgen for type annotation::
+ stubgen -m fatcat_openapi_client -o stubs/
+ stubgen -p fatcat_openapi_client -o stubs/
+
- schema.org metadata for releases
additional tests
diff --git a/python/fatcat_cleanup.py b/python/fatcat_cleanup.py
index d8b2aea2..4e11139e 100755
--- a/python/fatcat_cleanup.py
+++ b/python/fatcat_cleanup.py
@@ -1,6 +1,8 @@
#!/usr/bin/env python3
-import os, sys, argparse
+import os
+import sys
+import argparse
import raven
from fatcat_tools import authenticated_api
diff --git a/python/fatcat_export.py b/python/fatcat_export.py
index 5419e46c..763c217e 100755
--- a/python/fatcat_export.py
+++ b/python/fatcat_export.py
@@ -11,11 +11,7 @@ import sys
import json
import argparse
-import fatcat_openapi_client
-from fatcat_openapi_client.rest import ApiException
-from fatcat_openapi_client import ReleaseEntity, ContainerEntity, ChangelogEntry
-from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \
- public_api
+from fatcat_tools import uuid2fcid, entity_to_dict, public_api
def run_export_releases(args):
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index 331cf791..252ab3a5 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -1,6 +1,8 @@
#!/usr/bin/env python3
-import os, sys, argparse
+import os
+import sys
+import argparse
import raven
from fatcat_tools import authenticated_api
diff --git a/python/fatcat_review.py b/python/fatcat_review.py
index 1d1db9a5..a10fc34b 100755
--- a/python/fatcat_review.py
+++ b/python/fatcat_review.py
@@ -2,11 +2,10 @@
import sys
import argparse
-import datetime
import raven
from fatcat_tools import authenticated_api
-from fatcat_tools.reviewers import DummyReviewBot, ReviewBot
+from fatcat_tools.reviewers import DummyReviewBot
# Yep, a global. Gets DSN from `SENTRY_DSN` environment variable
sentry_client = raven.Client()
diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py
index add03399..13310120 100644
--- a/python/fatcat_tools/api_auth.py
+++ b/python/fatcat_tools/api_auth.py
@@ -1,7 +1,7 @@
-import os, sys
+import os
+import sys
import fatcat_openapi_client
-from fatcat_openapi_client.rest import ApiException
def public_api(host_uri):
diff --git a/python/fatcat_tools/cleanups/common.py b/python/fatcat_tools/cleanups/common.py
index 47607cf1..04e6ade4 100644
--- a/python/fatcat_tools/cleanups/common.py
+++ b/python/fatcat_tools/cleanups/common.py
@@ -5,7 +5,6 @@ import subprocess
from collections import Counter
from fatcat_openapi_client import ApiClient, Editgroup
-from fatcat_openapi_client.rest import ApiException
from fatcat_tools.transforms import entity_from_dict, entity_to_dict
diff --git a/python/fatcat_tools/cleanups/files.py b/python/fatcat_tools/cleanups/files.py
index ec7e9064..a40e4a28 100644
--- a/python/fatcat_tools/cleanups/files.py
+++ b/python/fatcat_tools/cleanups/files.py
@@ -1,7 +1,6 @@
from fatcat_openapi_client.rest import ApiException
from fatcat_openapi_client.models import FileEntity
-from fatcat_tools.transforms import entity_to_dict, entity_from_json
from .common import EntityCleaner
@@ -70,4 +69,3 @@ class FileCleaner(EntityCleaner):
self.api.update_file(self.get_editgroup_id(), entity.ident, entity)
return 1
-
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index 37628f09..2554fe96 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -1,16 +1,10 @@
-import re
import sys
-import csv
import json
import time
-import itertools
-import datetime
-import requests
from confluent_kafka import Producer, KafkaException
from urllib.parse import urlparse, parse_qs
-from fatcat_tools.workers import most_recent_message
from .harvest_common import HarvestState, requests_retry_session
@@ -64,7 +58,6 @@ class HarvestCrossrefWorker:
to be careful how state is serialized back into kafka.
"""
-
def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email,
api_host_url="https://api.crossref.org/works", start_date=None,
end_date=None):
diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py
index 27ab8b4a..bdae3054 100644
--- a/python/fatcat_tools/harvest/harvest_common.py
+++ b/python/fatcat_tools/harvest/harvest_common.py
@@ -1,15 +1,13 @@
import sys
import json
-import time
import datetime
import requests
from requests.adapters import HTTPAdapter
# unclear why pylint chokes on this import. Recent 'requests' and 'urllib3' are
# in Pipenv.lock, and there are no errors in QA
from requests.packages.urllib3.util.retry import Retry # pylint: disable=import-error
-from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException, \
- OFFSET_BEGINNING
+from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException
# Used for parsing ISO date format (YYYY-MM-DD)
@@ -130,9 +128,11 @@ class HarvestState:
}).encode('utf-8')
if kafka_topic:
assert(kafka_config)
+
def fail_fast(err, msg):
if err:
raise KafkaException(err)
+
print("Committing status to Kafka: {}".format(kafka_topic), file=sys.stderr)
producer_conf = kafka_config.copy()
producer_conf.update({
@@ -159,9 +159,11 @@ class HarvestState:
return
print("Fetching state from kafka topic: {}".format(kafka_topic), file=sys.stderr)
+
def fail_fast(err, msg):
if err:
raise KafkaException(err)
+
conf = kafka_config.copy()
conf.update({
'group.id': 'dummy_init_group', # should never be committed
diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py
index d30f9507..c4e4a82a 100644
--- a/python/fatcat_tools/harvest/oaipmh.py
+++ b/python/fatcat_tools/harvest/oaipmh.py
@@ -1,16 +1,9 @@
-import re
import sys
-import csv
-import json
import time
-import itertools
-import datetime
-import requests
import sickle
from confluent_kafka import Producer, KafkaException
-from fatcat_tools.workers import most_recent_message
from .harvest_common import HarvestState
@@ -31,7 +24,6 @@ class HarvestOaiPmhWorker:
would want something similar operationally. Oh well!
"""
-
def __init__(self, kafka_hosts, produce_topic, state_topic,
start_date=None, end_date=None):
@@ -69,7 +61,7 @@ class HarvestOaiPmhWorker:
})
producer = Producer(producer_conf)
- api = sickle.Sickle(self.endpoint_url)
+ api = sickle.Sickle(self.endpoint_url, max_retries=5, retry_status_codes=[503])
date_str = date.isoformat()
# this dict kwargs hack is to work around 'from' as a reserved python keyword
# recommended by sickle docs
diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py
index f6301b8d..802d31d8 100644
--- a/python/fatcat_tools/harvest/pubmed.py
+++ b/python/fatcat_tools/harvest/pubmed.py
@@ -19,7 +19,7 @@ import tempfile
import time
import xml.etree.ElementTree as ET
from ftplib import FTP
-from urllib.parse import urljoin, urlparse
+from urllib.parse import urlparse
import dateparser
from bs4 import BeautifulSoup
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py
index c71b33e9..47a8c4da 100644
--- a/python/fatcat_tools/importers/arabesque.py
+++ b/python/fatcat_tools/importers/arabesque.py
@@ -1,10 +1,6 @@
-import sys
-import json
-import sqlite3
-import itertools
import fatcat_openapi_client
-from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex
+from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex
ARABESQUE_MATCH_WHERE_CLAUSE='WHERE hit = 1 AND identifier IS NOT NULL'
@@ -186,4 +182,3 @@ class ArabesqueMatchImporter(EntityImporter):
description=self.editgroup_description,
extra=self.editgroup_extra),
entity_list=batch))
-
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py
index 719592fc..43325ebc 100644
--- a/python/fatcat_tools/importers/arxiv.py
+++ b/python/fatcat_tools/importers/arxiv.py
@@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
from pylatexenc.latex2text import LatexNodes2Text
import fatcat_openapi_client
-from .common import EntityImporter, clean
+from .common import EntityImporter
from .crossref import lookup_license_slug
@@ -97,7 +97,6 @@ class ArxivRawImporter(EntityImporter):
**kwargs)
self._test_override = False
-
def parse_record(self, record):
if not record:
@@ -188,7 +187,6 @@ class ArxivRawImporter(EntityImporter):
if lang == 'en':
lang = None
-
# extra:
# withdrawn_date
# translation_of
@@ -244,7 +242,7 @@ class ArxivRawImporter(EntityImporter):
For each version, do a lookup by full arxiv_id, and store work/release
id results.
-
+
If a version has a DOI, also do a doi lookup and store that result. If
there is an existing release with both matching, set that as the
existing work. If they don't match, use the full arxiv_id match and
@@ -345,6 +343,7 @@ class ArxivRawImporter(EntityImporter):
print(json.dumps(resp))
#sys.exit(-1)
-if __name__=='__main__':
+
+if __name__ == '__main__':
parser = ArxivRawImporter(None)
parser.parse_file(open(sys.argv[1]))
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py
index 536c013b..36a2f9a6 100755
--- a/python/fatcat_tools/importers/cdl_dash_dat.py
+++ b/python/fatcat_tools/importers/cdl_dash_dat.py
@@ -82,7 +82,7 @@ def cdl_dash_release(meta, extra=None):
#print(abstracts)
if not abstracts:
abstracts = None
-
+
contribs = []
for creator in meta['creator']:
contribs.append(ReleaseContrib(
@@ -120,7 +120,7 @@ def make_release_fileset(dat_path):
with open(dat_path + "/cdl_dash_metadata.json", 'r') as fp:
meta_dict = json.loads(fp.read())
-
+
release = cdl_dash_release(meta_dict)
ark_id = release.extra['ark_id']
diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py
index 375b6051..d5d1cce8 100644
--- a/python/fatcat_tools/importers/chocula.py
+++ b/python/fatcat_tools/importers/chocula.py
@@ -1,7 +1,4 @@
-import sys
-import json
-import itertools
import fatcat_openapi_client
from .common import EntityImporter, clean
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index eafc6546..c0578224 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -161,18 +161,18 @@ def is_cjk(s):
return False
def test_is_cjk():
- assert is_cjk(None) == False
- assert is_cjk('') == False
- assert is_cjk('blah') == False
- assert is_cjk('岡, 鹿, 梨, 阜, 埼') == True
- assert is_cjk('[岡, 鹿, 梨, 阜, 埼]') == True
- assert is_cjk('菊') == True
- assert is_cjk('岡, 鹿, 梨, 阜, 埼 with eng after') == True
- assert is_cjk('水道') == True
- assert is_cjk('オウ, イク') == True # kanji
- assert is_cjk('ひヒ') == True
- assert is_cjk('き゚ゅ') == True
- assert is_cjk('ㄴ, ㄹ, ㅁ, ㅂ, ㅅ') == True
+ assert is_cjk(None) is False
+ assert is_cjk('') is False
+ assert is_cjk('blah') is False
+ assert is_cjk('岡, 鹿, 梨, 阜, 埼') is True
+ assert is_cjk('[岡, 鹿, 梨, 阜, 埼]') is True
+ assert is_cjk('菊') is True
+ assert is_cjk('岡, 鹿, 梨, 阜, 埼 with eng after') is True
+ assert is_cjk('水道') is True
+ assert is_cjk('オウ, イク') is True # kanji
+ assert is_cjk('ひヒ') is True
+ assert is_cjk('き゚ゅ') is True
+ assert is_cjk('ㄴ, ㄹ, ㅁ, ㅂ, ㅅ') is True
DOMAIN_REL_MAP = {
"archive.org": "archive",
@@ -368,7 +368,7 @@ class EntityImporter:
if self._entity_queue:
self.insert_batch(self._entity_queue)
self.counts['insert'] += len(self._entity_queue)
- self._entity_queue = []
+ self._entity_queue = []
return self.counts
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index d26f089f..854e3d9f 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -1,10 +1,6 @@
-import sys
-import json
import sqlite3
import datetime
-import itertools
-import subprocess
import fatcat_openapi_client
from .common import EntityImporter, clean
@@ -425,7 +421,6 @@ class CrossrefImporter(EntityImporter):
release_year = raw_date[0]
release_date = None
-
original_title = None
if obj.get('original-title'):
original_title = clean(obj.get('original-title')[0], force_xml=True)
@@ -500,7 +495,7 @@ class CrossrefImporter(EntityImporter):
if existing:
self.counts['exists'] += 1
return False
-
+
return True
def insert_batch(self, batch):
@@ -509,4 +504,3 @@ class CrossrefImporter(EntityImporter):
description=self.editgroup_description,
extra=self.editgroup_extra),
entity_list=batch))
-
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 962d80c6..6aeb6a68 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -10,7 +10,6 @@ functions (parse_datacite_...), which may help testing.
import collections
import datetime
-import hashlib
import re
import json
import sqlite3
@@ -765,7 +764,7 @@ class DataciteImporter(EntityImporter):
nameType = c.get('nameType', '') or ''
if nameType in ('', 'Personal'):
creator_id = None
- for nid in c.get('nameIdentifiers', []):
+ for nid in c.get('nameIdentifiers', []) or []:
name_scheme = nid.get('nameIdentifierScheme', '') or ''
if not name_scheme.lower() == "orcid":
continue
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 2077eae4..5ec6cc3c 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -1,9 +1,7 @@
#!/usr/bin/env python3
-import sys
import json
import base64
-import datetime
import fatcat_openapi_client
from .common import EntityImporter, clean, make_rel_url
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 2b630e67..4b1d3702 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -1,10 +1,6 @@
-import sys
-import json
-import base64
-import itertools
import fatcat_openapi_client
-from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex
+from .common import EntityImporter, make_rel_url
class IngestFileResultImporter(EntityImporter):
@@ -284,4 +280,3 @@ class SavePaperNowFileImporter(IngestFileResultImporter):
description=self.editgroup_description,
extra=self.editgroup_extra),
entity_list=batch))
-
diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py
index e30bb233..38aa00eb 100644
--- a/python/fatcat_tools/importers/jalc.py
+++ b/python/fatcat_tools/importers/jalc.py
@@ -1,10 +1,7 @@
import sys
-import json
import sqlite3
import datetime
-import itertools
-import subprocess
from bs4 import BeautifulSoup
import fatcat_openapi_client
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py
index d439c80a..32782eac 100644
--- a/python/fatcat_tools/importers/journal_metadata.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -1,7 +1,4 @@
-import sys
-import json
-import itertools
import fatcat_openapi_client
from .common import EntityImporter, clean
diff --git a/python/fatcat_tools/importers/jstor.py b/python/fatcat_tools/importers/jstor.py
index 96dbf947..5d35f5e2 100644
--- a/python/fatcat_tools/importers/jstor.py
+++ b/python/fatcat_tools/importers/jstor.py
@@ -183,7 +183,7 @@ class JstorImporter(EntityImporter):
# suspect jan 1st dates get set by JSTOR when actual
# date not known (citation needed), so drop them
release_date = None
-
+
volume = None
if article_meta.volume:
volume = article_meta.volume.string or None
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 180d7ba3..d95c5847 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -1,12 +1,8 @@
-import sys
-import json
-import sqlite3
-import itertools
import fatcat_openapi_client
from fatcat_tools.normal import *
-from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS
+from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS
class MatchedImporter(EntityImporter):
@@ -160,7 +156,6 @@ class MatchedImporter(EntityImporter):
self.counts['skip-update-inflight'] += 1
return False
-
# minimum viable "existing" URL cleanup to fix dupes and broken links:
# remove 'None' wayback URLs, and set archive.org rel 'archive'
existing.urls = [u for u in existing.urls if not ('://web.archive.org/web/None/' in u.url)]
@@ -207,4 +202,3 @@ class MatchedImporter(EntityImporter):
description=self.editgroup_description,
extra=self.editgroup_extra),
entity_list=batch))
-
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py
index 554e052f..21feea9e 100644
--- a/python/fatcat_tools/importers/orcid.py
+++ b/python/fatcat_tools/importers/orcid.py
@@ -1,7 +1,5 @@
import sys
-import json
-import itertools
import fatcat_openapi_client
from .common import EntityImporter, clean
@@ -89,7 +87,7 @@ class OrcidImporter(EntityImporter):
if existing:
self.counts['exists'] += 1
return False
-
+
return True
def insert_batch(self, batch):
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index 3d3e3a8c..d8a6842c 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -1,11 +1,9 @@
import sys
import json
-import sqlite3
import datetime
import warnings
from bs4 import BeautifulSoup
-from bs4.element import NavigableString
import fatcat_openapi_client
from fatcat_tools.normal import *
@@ -314,7 +312,7 @@ class PubmedImporter(EntityImporter):
Importer for PubMed/MEDLINE XML metadata.
If lookup_refs is true, will do identifer-based lookups for all references.
-
+
TODO: MEDLINE doesn't include PMC/OA license; could include in importer?
"""
@@ -502,7 +500,7 @@ class PubmedImporter(EntityImporter):
ce_edit = self.create_container(ce)
container_id = ce_edit.ident
self._issnl_id_map[issnl] = container_id
-
+
ji = journal.JournalIssue
volume = None
if ji.find("Volume"):
diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py
index 4cd22775..c04e9aa8 100644
--- a/python/fatcat_tools/importers/shadow.py
+++ b/python/fatcat_tools/importers/shadow.py
@@ -1,8 +1,4 @@
-import sys
-import json
-import sqlite3
-import itertools
import fatcat_openapi_client
from fatcat_tools.normal import *
@@ -192,4 +188,3 @@ class ShadowLibraryImporter(EntityImporter):
description=self.editgroup_description,
extra=self.editgroup_extra),
entity_list=batch))
-
diff --git a/python/fatcat_tools/kafka.py b/python/fatcat_tools/kafka.py
index 53b62a37..228de134 100644
--- a/python/fatcat_tools/kafka.py
+++ b/python/fatcat_tools/kafka.py
@@ -1,5 +1,5 @@
-from confluent_kafka import Consumer, Producer, KafkaException
+from confluent_kafka import Producer, KafkaException
def kafka_fail_fast(err, msg):
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py
index f962ff3c..e65af8d6 100644
--- a/python/fatcat_tools/normal.py
+++ b/python/fatcat_tools/normal.py
@@ -231,4 +231,3 @@ def test_clean_orcid():
assert clean_orcid("0123-4567-3456-6789 ") == "0123-4567-3456-6789"
assert clean_orcid("01234567-3456-6780") == None
assert clean_orcid("0x23-4567-3456-6780") == None
-
diff --git a/python/fatcat_tools/reviewers/review_common.py b/python/fatcat_tools/reviewers/review_common.py
index ecf7da8f..336a47f6 100644
--- a/python/fatcat_tools/reviewers/review_common.py
+++ b/python/fatcat_tools/reviewers/review_common.py
@@ -1,12 +1,10 @@
-import json
import time
import datetime
import subprocess
from collections import Counter
import fatcat_openapi_client
-from fatcat_openapi_client.rest import ApiException
"""
checks should return:
@@ -132,11 +130,12 @@ class ReviewBot:
status, result_counts[status])
for result in results:
if result.status == status and result.check_type == "editgroup":
- comment += "\n- {description}".format(result.description)
+ comment += "\n- {description}".format(description=result.description)
if result.status == status and result.check_type != "editgroup":
- comment += "\n- {check_type} [{rev}](/{release_type}/rev/{rev}): {description}".format(
+ comment += "\n- {check_type} [{rev}](/{entity_type}/rev/{rev}): {description}".format(
check_type=result.check_type,
rev=result.rev,
+ entity_type=result.check_type,
description=result.description)
extra = self.extra.copy()
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py
index 832ad6aa..ba199efb 100644
--- a/python/fatcat_tools/transforms/csl.py
+++ b/python/fatcat_tools/transforms/csl.py
@@ -1,6 +1,5 @@
import json
-import collections
from citeproc import CitationStylesStyle, CitationStylesBibliography
from citeproc import Citation, CitationItem
@@ -8,8 +7,6 @@ from citeproc import formatter
from citeproc.source.json import CiteProcJSON
from citeproc_styles import get_style_filepath
-from fatcat_openapi_client import ApiClient
-
def contribs_by_role(contribs, role):
ret = [c.copy() for c in contribs if c['role'] == role]
@@ -214,14 +211,13 @@ def citeproc_csl(csl_json, style, html=False):
lines = bib.bibliography()[0]
if style == "bibtex":
out = ""
- for l in lines:
- if l.startswith(" @"):
+ for line in lines:
+ if line.startswith(" @"):
out += "@"
- elif l.startswith(" "):
- out += "\n " + l
+ elif line.startswith(" "):
+ out += "\n " + line
else:
- out += l
+ out += line
return ''.join(out)
else:
return ''.join(lines)
-
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index 1d35141b..8ec9c164 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -1,7 +1,5 @@
-import collections
import tldextract
-from fatcat_openapi_client import ApiClient
def check_kbart(year, archive):
@@ -14,11 +12,11 @@ def check_kbart(year, archive):
def test_check_kbart():
- assert check_kbart(1990, dict()) == None
- assert check_kbart(1990, dict(year_spans=[[2000, 2000]])) == False
- assert check_kbart(2000, dict(year_spans=[[2000, 2000]])) == True
- assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) == False
- assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) == True
+ assert check_kbart(1990, dict()) is None
+ assert check_kbart(1990, dict(year_spans=[[2000, 2000]])) is False
+ assert check_kbart(2000, dict(year_spans=[[2000, 2000]])) is True
+ assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) is False
+ assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) is True
def release_to_elasticsearch(entity, force_bool=True):
diff --git a/python/fatcat_tools/transforms/entities.py b/python/fatcat_tools/transforms/entities.py
index ae666413..53455e85 100644
--- a/python/fatcat_tools/transforms/entities.py
+++ b/python/fatcat_tools/transforms/entities.py
@@ -32,4 +32,3 @@ def entity_from_json(json_str, entity_type, api_client=None):
def entity_from_dict(obj, entity_type, api_client=None):
json_str = json.dumps(obj)
return entity_from_json(json_str, entity_type, api_client=api_client)
-
diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py
index 22b5154e..2f4e2271 100644
--- a/python/fatcat_tools/transforms/ingest.py
+++ b/python/fatcat_tools/transforms/ingest.py
@@ -61,4 +61,3 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type=
ingest_request['link_source_id'] = link_source_id
return ingest_request
-
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index 3a49f86e..d5891ad1 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -354,4 +354,3 @@ class EntityUpdatesWorker(FatcatWorker):
producer.flush()
# TODO: publish updated 'work' entities to a topic
consumer.store_offsets(message=msg)
-
diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py
index e58b3da1..61854c31 100644
--- a/python/fatcat_tools/workers/elasticsearch.py
+++ b/python/fatcat_tools/workers/elasticsearch.py
@@ -1,6 +1,5 @@
import json
-import time
import requests
from confluent_kafka import Consumer, KafkaException
@@ -138,7 +137,6 @@ class ElasticsearchReleaseWorker(FatcatWorker):
consumer.store_offsets(message=msg)
-
class ElasticsearchContainerWorker(ElasticsearchReleaseWorker):
def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None,
diff --git a/python/fatcat_tools/workers/worker_common.py b/python/fatcat_tools/workers/worker_common.py
index ef79f528..8c2936be 100644
--- a/python/fatcat_tools/workers/worker_common.py
+++ b/python/fatcat_tools/workers/worker_common.py
@@ -1,15 +1,6 @@
-import re
-import sys
-import csv
-import json
-import itertools
-from itertools import islice
from confluent_kafka import Consumer, KafkaException, TopicPartition
-import fatcat_openapi_client
-from fatcat_openapi_client.rest import ApiException
-
def most_recent_message(topic, kafka_config):
"""
diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py
index 14595670..8e01c860 100755
--- a/python/fatcat_transform.py
+++ b/python/fatcat_transform.py
@@ -9,16 +9,8 @@ import sys
import json
import argparse
-from citeproc import CitationStylesStyle, CitationStylesBibliography
-from citeproc import Citation, CitationItem
-from citeproc import formatter
-from citeproc.source.json import CiteProcJSON
-from citeproc_styles import get_style_filepath
-
-import fatcat_openapi_client
-from fatcat_openapi_client.rest import ApiException
from fatcat_openapi_client import ReleaseEntity, ContainerEntity, FileEntity, ChangelogEntry
-from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \
+from fatcat_tools import entity_from_json, \
release_to_elasticsearch, container_to_elasticsearch, \
file_to_elasticsearch, changelog_to_elasticsearch, public_api, \
release_to_csl, citeproc_csl
diff --git a/python/fatcat_util.py b/python/fatcat_util.py
index d6e76697..a45b2ba4 100755
--- a/python/fatcat_util.py
+++ b/python/fatcat_util.py
@@ -8,14 +8,9 @@ TODO:
"""
import sys
-import json
import argparse
-import fatcat_openapi_client
-from fatcat_openapi_client.rest import ApiException
-from fatcat_openapi_client import ReleaseEntity, ContainerEntity, ChangelogEntry
-from fatcat_tools import uuid2fcid, fcid2uuid, entity_from_json, \
- entity_to_dict, public_api, authenticated_api
+from fatcat_tools import uuid2fcid, fcid2uuid, authenticated_api
def run_uuid2fcid(args):
diff --git a/python/fatcat_web/auth.py b/python/fatcat_web/auth.py
index 8e26b7fe..ed9f2252 100644
--- a/python/fatcat_web/auth.py
+++ b/python/fatcat_web/auth.py
@@ -2,8 +2,7 @@
from collections import namedtuple
import requests
import pymacaroons
-from flask import Flask, render_template, send_from_directory, request, \
- url_for, abort, g, redirect, jsonify, session, flash
+from flask import render_template, abort, redirect, session, flash
from flask_login import logout_user, login_user, UserMixin
from fatcat_web import login_manager, app, api, priv_api, Config
import fatcat_openapi_client
@@ -141,8 +140,9 @@ def handle_wmoauth(username):
# pass off "as if" we did OAuth successfully
FakeOAuthRemote = namedtuple('FakeOAuthRemote', ['name', 'OAUTH_CONFIG'])
remote = FakeOAuthRemote(name='wikipedia', OAUTH_CONFIG={'api_base_url': "https://www.mediawiki.org/w"})
+ conservative_username = ''.join(filter(str.isalnum, username))
oauth_info = {
- 'preferred_username': username,
+ 'preferred_username': conservative_username,
'iss': "https://www.mediawiki.org/w",
'sub': username,
}
diff --git a/python/fatcat_web/editing_routes.py b/python/fatcat_web/editing_routes.py
index 87223868..44000b1a 100644
--- a/python/fatcat_web/editing_routes.py
+++ b/python/fatcat_web/editing_routes.py
@@ -1,16 +1,11 @@
-import os
-import json
-from flask import Flask, render_template, send_from_directory, request, \
- url_for, abort, g, redirect, jsonify, session, flash, Response
+from flask import render_template, abort, redirect, session, flash
from flask_login import login_required
from fatcat_openapi_client import Editgroup
from fatcat_openapi_client.rest import ApiException
from fatcat_tools.transforms import *
-from fatcat_web import app, api, auth_api, priv_api
-from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth
-from fatcat_web.cors import crossdomain
+from fatcat_web import app, api, auth_api
from fatcat_web.search import *
from fatcat_web.forms import *
from fatcat_web.entity_helpers import *
@@ -20,7 +15,7 @@ from fatcat_web.entity_helpers import *
def form_editgroup_get_or_create(api, edit_form):
"""
- This function expects a submitted, validated
+ This function expects a submitted, validated edit form
"""
if edit_form.editgroup_id.data:
try:
@@ -43,8 +38,10 @@ def form_editgroup_get_or_create(api, edit_form):
app.log.warning(ae)
abort(ae.status)
# set this session editgroup_id
- flash('Started new editgroup <a href="/editgroup/{}">{}</a>' \
- .format(eg.editgroup_id, eg.editgroup_id))
+ flash('Started new editgroup <a href="/editgroup/{}">{}</a>'.format(
+ eg.editgroup_id,
+ eg.editgroup_id,
+ ))
return eg
def generic_entity_edit(editgroup_id, entity_type, existing_ident, edit_template):
@@ -68,7 +65,7 @@ def generic_entity_edit(editgroup_id, entity_type, existing_ident, edit_template
Helpers:
- get_editgroup_revision(editgroup, entity_type, ident) -> None or entity
-
+
TODO: prev_rev interlock
"""
@@ -214,7 +211,7 @@ def generic_edit_delete(editgroup_id, entity_type, edit_id):
# API on behalf of user
user_api = auth_api(session['api_token'])
-
+
# do the deletion
try:
if entity_type == 'container':
@@ -358,4 +355,3 @@ def work_editgroup_edit(editgroup_id, ident):
@app.route('/editgroup/<editgroup_id>/work/edit/<edit_id>/delete', methods=['POST'])
def work_edit_delete(editgroup_id, edit_id):
return abort(404)
-
diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py
index 377e35aa..15585bf6 100644
--- a/python/fatcat_web/forms.py
+++ b/python/fatcat_web/forms.py
@@ -8,7 +8,7 @@ from flask_wtf import FlaskForm
from wtforms import SelectField, DateField, StringField, IntegerField, \
HiddenField, FormField, FieldList, validators
-from fatcat_openapi_client import ContainerEntity, CreatorEntity, FileEntity, \
+from fatcat_openapi_client import ContainerEntity, FileEntity, \
ReleaseEntity, ReleaseContrib, FileUrl, ReleaseExtIds
release_type_options = [
@@ -293,9 +293,9 @@ class FileUrlForm(FlaskForm):
default='web')
class FileEntityForm(EntityEditForm):
+ # TODO: positive definite
size = IntegerField('Size (bytes)',
[validators.DataRequired()])
- # TODO: positive definite
md5 = StringField("MD5",
[validators.Optional(True),
validators.Length(min=32, max=32)])
@@ -413,4 +413,3 @@ class SavePaperNowForm(FlaskForm):
ingest_request['link_source'] = 'arxiv'
ingest_request['link_source_id'] = release.ext_ids.arxiv
return ingest_request
-
diff --git a/python/fatcat_web/graphics.py b/python/fatcat_web/graphics.py
index fea7eb5a..56852627 100644
--- a/python/fatcat_web/graphics.py
+++ b/python/fatcat_web/graphics.py
@@ -33,4 +33,3 @@ def ia_coverage_histogram(rows):
chart.add('via Fatcat', [y['available'] for y in years])
chart.add('Missing', [y['missing'] for y in years])
return chart
-
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index 58f4b7e0..4684f799 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -2,12 +2,12 @@
import os
import sys
import json
-from flask import Flask, render_template, make_response, send_from_directory, \
- request, url_for, abort, g, redirect, jsonify, session, flash, Response
+from flask import render_template, make_response, send_from_directory, \
+ request, url_for, abort, redirect, jsonify, session, flash, Response
from flask_login import login_required
from flask_wtf.csrf import CSRFError
-from fatcat_openapi_client import Editgroup, EditgroupAnnotation
+from fatcat_openapi_client import EditgroupAnnotation
from fatcat_openapi_client.rest import ApiException
from fatcat_tools.transforms import *
from fatcat_tools.normal import *
@@ -1042,4 +1042,3 @@ def robots():
return send_from_directory(os.path.join(app.root_path, 'static'),
'robots.txt',
mimetype='text/plain')
-
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index c1246d22..4a87c735 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -66,7 +66,6 @@ def do_release_search(q, limit=30, fulltext_only=True, offset=0):
if len(q.split()) == 1 and q.startswith("10.") and q.count("/") >= 1:
q = 'doi:"{}"'.format(q)
-
if fulltext_only:
q += " in_web:true"
@@ -297,7 +296,7 @@ def get_elastic_container_random_releases(ident, limit=5):
def get_elastic_container_histogram(ident):
"""
- Fetches a stacked histogram of
+ Fetches a stacked histogram
Filters to the past 500 years (at most), or about 1000 values.
diff --git a/python/fatcat_web/web_config.py b/python/fatcat_web/web_config.py
index 0cb153d6..344f1c2a 100644
--- a/python/fatcat_web/web_config.py
+++ b/python/fatcat_web/web_config.py
@@ -83,4 +83,3 @@ class Config(object):
'fatcat_domain': FATCAT_DOMAIN,
},
}
-
diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py
index 03167a3a..19ac16cd 100755
--- a/python/fatcat_worker.py
+++ b/python/fatcat_worker.py
@@ -2,7 +2,6 @@
import sys
import argparse
-import datetime
import raven
from fatcat_tools import public_api
diff --git a/python/shell.py b/python/shell.py
index c207a325..d53911b9 100644
--- a/python/shell.py
+++ b/python/shell.py
@@ -1,3 +1,4 @@
+# flake8: noqa
# bunch of libraries one might want
import uuid
diff --git a/python/tests/api_annotations.py b/python/tests/api_annotations.py
index e5566eef..0606b637 100644
--- a/python/tests/api_annotations.py
+++ b/python/tests/api_annotations.py
@@ -1,10 +1,5 @@
-import json
-import pytest
-from copy import copy
-
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
diff --git a/python/tests/api_containers.py b/python/tests/api_containers.py
index 0850fab7..70dbcd7e 100644
--- a/python/tests/api_containers.py
+++ b/python/tests/api_containers.py
@@ -1,10 +1,5 @@
-import json
-import pytest
-from copy import copy
-
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -43,7 +38,7 @@ def test_container(api):
# get redirects (none)
assert api.get_container_redirects(c2.ident) == []
-
+
# delete
eg = quick_eg(api)
api.delete_container(eg.editgroup_id, c2.ident)
@@ -59,4 +54,3 @@ def test_container_examples(api):
c2 = api.lookup_container(issnl=c1.issnl)
assert c1.ident == c2.ident
-
diff --git a/python/tests/api_creators.py b/python/tests/api_creators.py
index 1ce6380a..b271e2b3 100644
--- a/python/tests/api_creators.py
+++ b/python/tests/api_creators.py
@@ -1,10 +1,5 @@
-import json
-import pytest
-from copy import copy
-
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -52,7 +47,7 @@ def test_creators(api):
assert c1.display_name == c3.display_name
assert c1.extra == c3.extra
-
+
# delete
eg = quick_eg(api)
api.delete_creator(eg.editgroup_id, c2.ident)
diff --git a/python/tests/api_editgroups.py b/python/tests/api_editgroups.py
index d82c9233..142687c2 100644
--- a/python/tests/api_editgroups.py
+++ b/python/tests/api_editgroups.py
@@ -1,11 +1,8 @@
-import json
import pytest
import datetime
-from copy import copy
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
diff --git a/python/tests/api_editor.py b/python/tests/api_editor.py
index 64bb2759..91881743 100644
--- a/python/tests/api_editor.py
+++ b/python/tests/api_editor.py
@@ -1,12 +1,5 @@
-import json
-import pytest
-import datetime
-from copy import copy
-
-from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
-from fixtures import *
+from fixtures import api
def test_editor_update(api):
diff --git a/python/tests/api_entity_editing.py b/python/tests/api_entity_editing.py
index d5377e18..fee4e34f 100644
--- a/python/tests/api_entity_editing.py
+++ b/python/tests/api_entity_editing.py
@@ -1,10 +1,7 @@
-import json
import pytest
-from copy import copy
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -180,4 +177,3 @@ def test_edit_delete_all(api_dummy_entities):
assert len(eg.edits.webcaptures) == 0
assert len(eg.edits.releases) == 0
assert len(eg.edits.works) == 0
-
diff --git a/python/tests/api_files.py b/python/tests/api_files.py
index 74865daa..65eda993 100644
--- a/python/tests/api_files.py
+++ b/python/tests/api_files.py
@@ -1,10 +1,5 @@
-import json
-import pytest
-from copy import copy
-
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -50,7 +45,7 @@ def test_file(api):
# get redirects (none)
assert api.get_file_redirects(f2.ident) == []
-
+
# delete
eg = quick_eg(api)
api.delete_file(eg.editgroup_id, f2.ident)
diff --git a/python/tests/api_filesets.py b/python/tests/api_filesets.py
index 7f3235cb..6d755744 100644
--- a/python/tests/api_filesets.py
+++ b/python/tests/api_filesets.py
@@ -1,10 +1,7 @@
-import json
import pytest
-from copy import copy
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -64,7 +61,7 @@ def test_fileset(api):
# get redirects (none)
assert api.get_fileset_redirects(fs2.ident) == []
-
+
# delete
eg = quick_eg(api)
api.delete_fileset(eg.editgroup_id, fs2.ident)
@@ -100,4 +97,3 @@ def test_bad_fileset(api):
for b in bad_list:
with pytest.raises(fatcat_openapi_client.rest.ApiException):
api.create_fileset(eg.editgroup_id, b)
-
diff --git a/python/tests/api_misc.py b/python/tests/api_misc.py
index 11f85fd6..4c9ac9a6 100644
--- a/python/tests/api_misc.py
+++ b/python/tests/api_misc.py
@@ -1,10 +1,5 @@
-import json
-import pytest
-from copy import copy
-
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -45,4 +40,3 @@ def test_unexpected_body(api):
)
f1.urls = [dict(url="http://thing", rel="repository", asdf="blue")]
api.create_file(eg.editgroup_id, f1)
-
diff --git a/python/tests/api_releases.py b/python/tests/api_releases.py
index 2df08698..c4c05ea6 100644
--- a/python/tests/api_releases.py
+++ b/python/tests/api_releases.py
@@ -1,11 +1,8 @@
-import json
import pytest
import datetime
-from copy import copy
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -130,7 +127,7 @@ def test_release(api):
# get redirects (none)
assert api.get_release_redirects(r2.ident) == []
-
+
# delete
eg = quick_eg(api)
api.delete_release(eg.editgroup_id, r2.ident)
@@ -210,4 +207,3 @@ def test_controlled_vocab(api):
api.create_release(eg.editgroup_id, r3)
r3.withdrawn_status = "spam"
api.create_release(eg.editgroup_id, r3)
-
diff --git a/python/tests/api_webcaptures.py b/python/tests/api_webcaptures.py
index 1054b41f..85813218 100644
--- a/python/tests/api_webcaptures.py
+++ b/python/tests/api_webcaptures.py
@@ -1,11 +1,8 @@
-import json
import pytest
import datetime
-from copy import copy
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -84,7 +81,7 @@ def test_webcapture(api):
# get redirects (none)
assert api.get_webcapture_redirects(wc2.ident) == []
-
+
# delete
eg = quick_eg(api)
api.delete_webcapture(eg.editgroup_id, wc2.ident)
diff --git a/python/tests/citation_efficiency.py b/python/tests/citation_efficiency.py
index aefb7d15..f8807db6 100644
--- a/python/tests/citation_efficiency.py
+++ b/python/tests/citation_efficiency.py
@@ -1,10 +1,5 @@
-import json
-import pytest
-from copy import copy
-
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -110,4 +105,3 @@ def test_citation_encoding(api):
assert container == r1.refs[0].container_name
assert extra == r1.refs[0].extra
assert locator == r1.refs[0].locator
-
diff --git a/python/tests/clean_files.py b/python/tests/clean_files.py
index 8a87f218..ce1102be 100644
--- a/python/tests/clean_files.py
+++ b/python/tests/clean_files.py
@@ -1,9 +1,10 @@
import copy
import pytest
+
from fatcat_tools.cleanups import FileCleaner
from fatcat_openapi_client import *
-from fixtures import api
+from fixtures import *
@pytest.fixture(scope="function")
diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py
index 78742114..44c7be63 100644
--- a/python/tests/fixtures.py
+++ b/python/tests/fixtures.py
@@ -1,8 +1,4 @@
-import os
-import time
-import json
-import signal
import pytest
from dotenv import load_dotenv
import fatcat_web
@@ -87,4 +83,3 @@ def test_get_changelog_entry(api):
def quick_eg(api_inst):
eg = api_inst.create_editgroup(fatcat_openapi_client.Editgroup())
return eg
-
diff --git a/python/tests/harvest_crossref.py b/python/tests/harvest_crossref.py
index e902cda5..cad0f03b 100644
--- a/python/tests/harvest_crossref.py
+++ b/python/tests/harvest_crossref.py
@@ -1,6 +1,5 @@
import json
-import pytest
import datetime
import responses
from fatcat_tools.harvest import *
diff --git a/python/tests/harvest_datacite.py b/python/tests/harvest_datacite.py
index 004d1fef..13c6042a 100644
--- a/python/tests/harvest_datacite.py
+++ b/python/tests/harvest_datacite.py
@@ -1,6 +1,5 @@
import json
-import pytest
import datetime
import responses
from fatcat_tools.harvest import *
diff --git a/python/tests/harvest_pubmed.py b/python/tests/harvest_pubmed.py
index f8db46b6..58bc4226 100644
--- a/python/tests/harvest_pubmed.py
+++ b/python/tests/harvest_pubmed.py
@@ -2,14 +2,11 @@
Test pubmed FTP harvest.
"""
-import datetime
-import json
import os
-
+import datetime
import pytest
from fatcat_tools.harvest import *
-from fatcat_tools.harvest.pubmed import generate_date_file_map
def test_pubmed_harvest_date(mocker):
@@ -77,4 +74,3 @@ def test_pubmed_harvest_date_no_pmid(mocker):
# The file has not PMID, not importable.
with pytest.raises(ValueError):
harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d'))
-
diff --git a/python/tests/harvest_state.py b/python/tests/harvest_state.py
index 8b7deba6..cc624d97 100644
--- a/python/tests/harvest_state.py
+++ b/python/tests/harvest_state.py
@@ -1,6 +1,4 @@
-import json
-import pytest
import datetime
from fatcat_tools.harvest import *
diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py
index 9483eb45..20cde3a6 100644
--- a/python/tests/import_arabesque.py
+++ b/python/tests/import_arabesque.py
@@ -1,8 +1,9 @@
import json
import pytest
+
from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher, JsonLinePusher
-from fixtures import api
+from fixtures import *
@pytest.fixture(scope="function")
diff --git a/python/tests/import_arxiv.py b/python/tests/import_arxiv.py
index 1e649616..9306e67c 100644
--- a/python/tests/import_arxiv.py
+++ b/python/tests/import_arxiv.py
@@ -1,10 +1,10 @@
-import json, gzip
import pytest
-from fatcat_tools.importers import ArxivRawImporter, Bs4XmlFilePusher
-from fixtures import api
from bs4 import BeautifulSoup
+from fatcat_tools.importers import ArxivRawImporter, Bs4XmlFilePusher
+from fixtures import *
+
@pytest.fixture(scope="function")
def arxiv_importer(api):
diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py
index afa2410f..65cd2c37 100644
--- a/python/tests/import_crossref.py
+++ b/python/tests/import_crossref.py
@@ -1,8 +1,10 @@
-import json, gzip
+import json
+import gzip
import pytest
+
from fatcat_tools.importers import CrossrefImporter, JsonLinePusher
-from fixtures import api
+from fixtures import *
@pytest.fixture(scope="function")
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index b01a11e6..b94b6bc5 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -2,10 +2,13 @@
Test datacite importer.
"""
-import collections
+import gzip
+import json
import datetime
+import collections
+
import pytest
-import gzip
+
from fatcat_tools.importers import DataciteImporter, JsonLinePusher
from fatcat_tools.importers.datacite import (
find_original_language_title,
diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py
index 51ab3faa..52284b89 100644
--- a/python/tests/import_grobid_metadata.py
+++ b/python/tests/import_grobid_metadata.py
@@ -3,8 +3,9 @@ import os
import json
import base64
import pytest
+
from fatcat_tools.importers import GrobidMetadataImporter, LinePusher
-from fixtures import api
+from fixtures import *
"""
WARNING: these tests are currently very fragile because they have database
diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py
index 02486de6..ebe2923c 100644
--- a/python/tests/import_ingest.py
+++ b/python/tests/import_ingest.py
@@ -1,6 +1,7 @@
import json
import pytest
+
from fatcat_tools.importers import IngestFileResultImporter, JsonLinePusher
from fixtures import *
diff --git a/python/tests/import_jalc.py b/python/tests/import_jalc.py
index f61ec849..ff757e51 100644
--- a/python/tests/import_jalc.py
+++ b/python/tests/import_jalc.py
@@ -1,10 +1,10 @@
-import json, gzip
import pytest
-from fatcat_tools.importers import JalcImporter, Bs4XmlFilePusher, Bs4XmlLinesPusher
-from fixtures import api
from bs4 import BeautifulSoup
+from fatcat_tools.importers import JalcImporter, Bs4XmlFilePusher, Bs4XmlLinesPusher
+from fixtures import *
+
@pytest.fixture(scope="function")
def jalc_importer(api):
diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py
index cfeee517..51b0a78a 100644
--- a/python/tests/import_journal_metadata.py
+++ b/python/tests/import_journal_metadata.py
@@ -1,7 +1,8 @@
import pytest
+
from fatcat_tools.importers import JournalMetadataImporter, JsonLinePusher
-from fixtures import api
+from fixtures import *
@pytest.fixture(scope="function")
diff --git a/python/tests/import_jstor.py b/python/tests/import_jstor.py
index 019f0aae..8494ffb2 100644
--- a/python/tests/import_jstor.py
+++ b/python/tests/import_jstor.py
@@ -1,10 +1,10 @@
-import json, gzip
import pytest
-from fatcat_tools.importers import JstorImporter, Bs4XmlFilePusher
-from fixtures import api
from bs4 import BeautifulSoup
+from fatcat_tools.importers import JstorImporter, Bs4XmlFilePusher
+from fixtures import *
+
@pytest.fixture(scope="function")
def jstor_importer(api):
diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py
index 72ed068c..6b61c53c 100644
--- a/python/tests/import_matched.py
+++ b/python/tests/import_matched.py
@@ -1,8 +1,9 @@
import json
import pytest
+
from fatcat_tools.importers import MatchedImporter, JsonLinePusher
-from fixtures import api
+from fixtures import *
@pytest.fixture(scope="function")
diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py
index 57886b52..f78ccde7 100644
--- a/python/tests/import_orcid.py
+++ b/python/tests/import_orcid.py
@@ -1,8 +1,9 @@
import json
import pytest
+
from fatcat_tools.importers import OrcidImporter, JsonLinePusher
-from fixtures import api
+from fixtures import *
@pytest.fixture(scope="function")
diff --git a/python/tests/import_pubmed.py b/python/tests/import_pubmed.py
index f57aa273..201f533c 100644
--- a/python/tests/import_pubmed.py
+++ b/python/tests/import_pubmed.py
@@ -1,10 +1,10 @@
-import json, gzip
import pytest
-from fatcat_tools.importers import PubmedImporter, Bs4XmlLargeFilePusher
-from fixtures import api
from bs4 import BeautifulSoup
+from fatcat_tools.importers import PubmedImporter, Bs4XmlLargeFilePusher
+from fixtures import *
+
@pytest.fixture(scope="function")
def pubmed_importer(api):
@@ -137,4 +137,3 @@ def test_pubmed_xml_parse_refs(pubmed_importer):
r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0])
assert len(r1.refs) > 1
-
diff --git a/python/tests/import_shadow.py b/python/tests/import_shadow.py
index 70a918d2..40a1d589 100644
--- a/python/tests/import_shadow.py
+++ b/python/tests/import_shadow.py
@@ -1,8 +1,9 @@
import json
import pytest
+
from fatcat_tools.importers import ShadowLibraryImporter, JsonLinePusher
-from fixtures import api
+from fixtures import *
@pytest.fixture(scope="function")
@@ -58,4 +59,3 @@ def test_shadow_dict_parse(shadow_importer):
assert u.url.startswith("https://web.archive.org/")
assert "20180729135948" in u.url
assert len(f.release_ids) == 1
-
diff --git a/python/tests/importer.py b/python/tests/importer.py
index 9308ba84..a412b247 100644
--- a/python/tests/importer.py
+++ b/python/tests/importer.py
@@ -1,8 +1,6 @@
-
-import pytest
from fatcat_tools.importers import CrossrefImporter, OrcidImporter
-from fixtures import api
+from fixtures import *
def test_issnl_mapping_lookup(api):
@@ -32,4 +30,3 @@ def test_identifiers(api):
assert oi.is_orcid("0000-00x3-3118-659") == False
assert oi.is_orcid("0000-00033118-659") == False
assert oi.is_orcid("0000-0003-3118-659.") == False
-
diff --git a/python/tests/subentity_state.py b/python/tests/subentity_state.py
index 614f88f1..e03fa99e 100644
--- a/python/tests/subentity_state.py
+++ b/python/tests/subentity_state.py
@@ -1,10 +1,5 @@
-import json
-import pytest
-from copy import copy
-
from fatcat_openapi_client import *
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
"""
@@ -221,4 +216,3 @@ def test_app_entity_states(api, app):
assert rv.status_code == 200
rv = app.get('/work/{}'.format(r2.work_id))
assert rv.status_code == 302
-
diff --git a/python/tests/tools_api.py b/python/tests/tools_api.py
index fd26b8ee..a4b5f2ea 100644
--- a/python/tests/tools_api.py
+++ b/python/tests/tools_api.py
@@ -1,6 +1,5 @@
import pytest
-from fatcat_openapi_client import EditgroupAnnotation
from fatcat_openapi_client.rest import ApiException
from fatcat_tools import public_api, authenticated_api
diff --git a/python/tests/transform_csl.py b/python/tests/transform_csl.py
index 15c64ce5..6436f876 100644
--- a/python/tests/transform_csl.py
+++ b/python/tests/transform_csl.py
@@ -1,11 +1,11 @@
import json
import pytest
+
from fatcat_tools import *
from fatcat_openapi_client import *
-
-from fixtures import api
from import_crossref import crossref_importer
+from fixtures import *
def test_csl_crossref(crossref_importer):
with open('tests/files/crossref-works.single.json', 'r') as f:
diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py
index a954fc4d..f791562c 100644
--- a/python/tests/transform_elasticsearch.py
+++ b/python/tests/transform_elasticsearch.py
@@ -1,13 +1,13 @@
import json
-import pytest
+
from fatcat_tools import *
from fatcat_openapi_client import *
-from fixtures import api
from import_journal_metadata import journal_metadata_importer
-
from import_crossref import crossref_importer
from import_matched import matched_importer
+from fixtures import *
+
def test_basic_elasticsearch_convert(crossref_importer):
with open('tests/files/crossref-works.single.json', 'r') as f:
diff --git a/python/tests/transform_ingest.py b/python/tests/transform_ingest.py
index 2d5652b8..c7044bc0 100644
--- a/python/tests/transform_ingest.py
+++ b/python/tests/transform_ingest.py
@@ -1,12 +1,12 @@
import json
-import pytest
+
from fatcat_tools.transforms import release_ingest_request
from fatcat_openapi_client import *
from fixtures import api
-
from import_crossref import crossref_importer
+
def test_basic_ingest_release(crossref_importer):
with open('tests/files/crossref-works.single.json', 'r') as f:
# not a single line
@@ -54,4 +54,3 @@ def test_rich_ingest_release():
assert ir['base_url'] == 'https://doi.org/10.123/456'
assert ir['ext_ids']['doi'] == '10.123/456'
assert ir['ext_ids'].get('pmcid') is None
-
diff --git a/python/tests/web_auth.py b/python/tests/web_auth.py
index 2c545b6b..643d806e 100644
--- a/python/tests/web_auth.py
+++ b/python/tests/web_auth.py
@@ -1,8 +1,5 @@
-import json
-import pytest
import responses
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
diff --git a/python/tests/web_citation_csl.py b/python/tests/web_citation_csl.py
index e016b2d9..fb3ce58d 100644
--- a/python/tests/web_citation_csl.py
+++ b/python/tests/web_citation_csl.py
@@ -1,8 +1,6 @@
import json
-import tempfile
import pytest
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
diff --git a/python/tests/web_editgroup.py b/python/tests/web_editgroup.py
index cbdd2176..20dc8d93 100644
--- a/python/tests/web_editgroup.py
+++ b/python/tests/web_editgroup.py
@@ -1,9 +1,7 @@
-import json
-import pytest
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
+
def test_editgroup_basics(app):
rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaae')
@@ -59,4 +57,3 @@ def test_editgroup_annotations_login(app_admin):
assert rv.status_code == 200
assert b'Signup' not in rv.data
assert b'Add Comment' in rv.data
-
diff --git a/python/tests/web_editing.py b/python/tests/web_editing.py
index 773a59dd..17f4f5ae 100644
--- a/python/tests/web_editing.py
+++ b/python/tests/web_editing.py
@@ -1,7 +1,4 @@
-import json
-import pytest
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -147,4 +144,3 @@ def test_web_edit_get(app_admin):
rv = app_admin.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/edit')
assert rv.status_code == 200
assert b'1549-1277' in rv.data
-
diff --git a/python/tests/web_editor.py b/python/tests/web_editor.py
index 2614be96..58b21ddf 100644
--- a/python/tests/web_editor.py
+++ b/python/tests/web_editor.py
@@ -1,7 +1,4 @@
-import json
-import pytest
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -25,4 +22,3 @@ def test_change_username(app_admin):
assert rv.status_code == 200
rv = app_admin.get('/auth/account')
assert b'admin-tmp' not in rv.data
-
diff --git a/python/tests/web_entity_views.py b/python/tests/web_entity_views.py
index a3f0f897..c1cbdc29 100644
--- a/python/tests/web_entity_views.py
+++ b/python/tests/web_entity_views.py
@@ -1,7 +1,4 @@
-import json
-import pytest
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
from fatcat_web.forms import ReleaseEntityForm, FileEntityForm, ContainerEntityForm
@@ -367,4 +364,3 @@ def test_web_work(app):
assert rv.status_code == 404
rv = app.get('/work/create')
assert rv.status_code == 404
-
diff --git a/python/tests/web_routes.py b/python/tests/web_routes.py
index 026776ee..0edf06d1 100644
--- a/python/tests/web_routes.py
+++ b/python/tests/web_routes.py
@@ -1,7 +1,4 @@
-import json
-import pytest
-from fatcat_openapi_client.rest import ApiException
from fixtures import *
@@ -13,4 +10,3 @@ def test_static_routes(app):
assert app.get("/search").status_code == 302
assert app.get("/static/bogus/route").status_code == 404
-
diff --git a/python/tests/web_search.py b/python/tests/web_search.py
index 24b817dc..7647bcf5 100644
--- a/python/tests/web_search.py
+++ b/python/tests/web_search.py
@@ -1,8 +1,7 @@
import json
-import pytest
import responses
-from fatcat_openapi_client.rest import ApiException
+
from fixtures import *
@responses.activate
diff --git a/rust/Makefile b/rust/Makefile
index 0abd739e..81fb32af 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -6,7 +6,7 @@ SHELL = /bin/bash
help: ## Print info about all commands
@echo "Commands:"
@echo
- @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[01;32m%-20s\033[0m %s\n", $$1, $$2}'
+ @grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[01;32m%-20s\033[0m %s\n", $$1, $$2}'
.PHONY: test
test: ## Run all tests and lints