summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xpython/fatcat_harvest.py2
-rw-r--r--python/fatcat_tools/api_auth.py1
-rw-r--r--python/fatcat_tools/harvest/doi_registrars.py3
-rw-r--r--python/fatcat_tools/harvest/harvest_common.py1
-rw-r--r--python/fatcat_tools/harvest/oaipmh.py11
-rw-r--r--python/fatcat_tools/importers/common.py4
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py1
-rw-r--r--python/fatcat_tools/importers/journal_metadata.py3
-rw-r--r--python/fatcat_tools/workers/changelog.py3
-rw-r--r--python/fatcat_tools/workers/worker_common.py4
-rw-r--r--python/fatcat_web/auth.py3
-rw-r--r--python/fatcat_web/cors.py7
-rw-r--r--python/fatcat_web/routes.py7
-rw-r--r--python/fatcat_web/search.py24
-rwxr-xr-xpython/fatcat_worker.py3
-rw-r--r--python/tests/import_matched.py2
16 files changed, 37 insertions, 42 deletions
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py
index e28c9b08..e76e1b20 100755
--- a/python/fatcat_harvest.py
+++ b/python/fatcat_harvest.py
@@ -1,9 +1,9 @@
#!/usr/bin/env python3
import sys
-import raven
import argparse
import datetime
+import raven
from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker,\
HarvestArxivWorker, HarvestPubmedWorker, HarvestDoajArticleWorker,\
HarvestDoajJournalWorker
diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py
index c49051f6..3c643cea 100644
--- a/python/fatcat_tools/api_auth.py
+++ b/python/fatcat_tools/api_auth.py
@@ -37,4 +37,3 @@ def authenticated_api(host_uri, token=None):
api.auth_check()
return api
-
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index 4a0cb8db..55d85ef9 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -4,9 +4,9 @@ import sys
import csv
import json
import time
-import requests
import itertools
import datetime
+import requests
from pykafka import KafkaClient
from fatcat_tools.workers import most_recent_message
@@ -182,4 +182,3 @@ class HarvestDataciteWorker(HarvestCrossrefWorker):
def update_params(self, params, resp):
params['page[number]'] = resp['meta']['page'] + 1
return params
-
diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py
index 6041a36f..f4d74be2 100644
--- a/python/fatcat_tools/harvest/harvest_common.py
+++ b/python/fatcat_tools/harvest/harvest_common.py
@@ -121,4 +121,3 @@ class HarvestState:
self.update(msg.value.decode('utf-8'))
c += 1
print("... got {} state update messages, done".format(c))
-
diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py
index abd917e0..923500fc 100644
--- a/python/fatcat_tools/harvest/oaipmh.py
+++ b/python/fatcat_tools/harvest/oaipmh.py
@@ -4,9 +4,9 @@ import sys
import csv
import json
import time
-import requests
import itertools
import datetime
+import requests
from pykafka import KafkaClient
import sickle
@@ -102,7 +102,7 @@ class HarvestArxivWorker(HarvestOaiPmhWorker):
"""
def __init__(self, **kwargs):
- super().__init__(**kwargs)
+ super().__init__(**kwargs)
self.endpoint_url = "https://export.arxiv.org/oai2"
self.metadata_prefix = "arXiv"
self.name = "arxiv"
@@ -117,7 +117,7 @@ class HarvestPubmedWorker(HarvestOaiPmhWorker):
"""
def __init__(self, **kwargs):
- super().__init__(**kwargs)
+ super().__init__(**kwargs)
self.endpoint_url = "https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi"
self.metadata_prefix = "pmc_fm"
self.name = "pubmed"
@@ -132,7 +132,7 @@ class HarvestDoajJournalWorker(HarvestOaiPmhWorker):
"""
def __init__(self, **kwargs):
- super().__init__(**kwargs)
+ super().__init__(**kwargs)
self.endpoint_url = "https://www.doaj.org/oai"
self.metadata_prefix = "oai_dc"
self.name = "doaj-journal"
@@ -144,8 +144,7 @@ class HarvestDoajArticleWorker(HarvestOaiPmhWorker):
"""
def __init__(self, **kwargs):
- super().__init__(**kwargs)
+ super().__init__(**kwargs)
self.endpoint_url = "https://www.doaj.org/oai.article"
self.metadata_prefix = "oai_doaj"
self.name = "doaj-article"
-
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index 32f7b4d5..b89c3828 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -111,7 +111,7 @@ def test_make_rel_url():
class EntityImporter:
"""
Base class for fatcat entity importers.
-
+
The API exposed to record iterator is:
push_record(raw_record)
@@ -143,7 +143,7 @@ class EntityImporter:
eg_extra['git_rev'] = eg_extra.get('git_rev',
subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8')
eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.EntityImporter')
-
+
self.api = api
self.bezerk_mode = kwargs.get('bezerk_mode', False)
self.edit_batch_size = kwargs.get('edit_batch_size', 100)
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index d693bc0a..7281a7a1 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -194,4 +194,3 @@ class GrobidMetadataImporter(EntityImporter):
autoaccept=True,
description=self.editgroup_description,
extra=json.dumps(self.editgroup_extra))
-
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py
index 75ee34d8..9bba2b12 100644
--- a/python/fatcat_tools/importers/journal_metadata.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -118,7 +118,7 @@ class JournalMetadataImporter(EntityImporter):
if existing:
self.counts['exists'] += 1
return False
-
+
return True
def insert_batch(self, batch):
@@ -126,4 +126,3 @@ class JournalMetadataImporter(EntityImporter):
autoaccept=True,
description=self.editgroup_description,
extra=json.dumps(self.editgroup_extra))
-
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index 636ed304..39a84f18 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -34,7 +34,7 @@ class ChangelogWorker(FatcatWorker):
self.offset = 1
with topic.get_producer() as producer:
- while True:
+ while True:
latest = int(self.api.get_changelog(limit=1)[0].index)
if latest > self.offset:
print("Fetching changelogs from {} through {}".format(
@@ -101,4 +101,3 @@ class EntityUpdatesWorker(FatcatWorker):
timestamp=None,
)
#consumer.commit_offsets()
-
diff --git a/python/fatcat_tools/workers/worker_common.py b/python/fatcat_tools/workers/worker_common.py
index b84341c7..57fb710c 100644
--- a/python/fatcat_tools/workers/worker_common.py
+++ b/python/fatcat_tools/workers/worker_common.py
@@ -4,10 +4,11 @@ import sys
import csv
import json
import itertools
-import fatcat_client
from itertools import islice
from pykafka import KafkaClient
from pykafka.common import OffsetType
+
+import fatcat_client
from fatcat_client.rest import ApiException
@@ -51,4 +52,3 @@ class FatcatWorker:
self.kafka = KafkaClient(hosts=kafka_hosts, broker_version="1.0.0")
self.produce_topic = produce_topic
self.consume_topic = consume_topic
-
diff --git a/python/fatcat_web/auth.py b/python/fatcat_web/auth.py
index 03964c92..1953151b 100644
--- a/python/fatcat_web/auth.py
+++ b/python/fatcat_web/auth.py
@@ -4,8 +4,8 @@ import requests
import pymacaroons
from flask import Flask, render_template, send_from_directory, request, \
url_for, abort, g, redirect, jsonify, session, flash
-from fatcat_web import login_manager, api, priv_api, Config
from flask_login import logout_user, login_user, UserMixin
+from fatcat_web import login_manager, api, priv_api, Config
import fatcat_client
def handle_logout():
@@ -141,4 +141,3 @@ def load_user(editor_id):
user.username = editor['username']
user.token = token
return user
-
diff --git a/python/fatcat_web/cors.py b/python/fatcat_web/cors.py
index 2285cf80..d84babd8 100644
--- a/python/fatcat_web/cors.py
+++ b/python/fatcat_web/cors.py
@@ -1,12 +1,13 @@
-from datetime import timedelta
-from flask import make_response, request, current_app
-from functools import update_wrapper
"""
This snippet from: http://flask.pocoo.org/snippets/56/
"Posted by Armin Ronacher on 2011-07-14"
"""
+from datetime import timedelta
+from functools import update_wrapper
+from flask import make_response, request, current_app
+
def crossdomain(origin=None, methods=None, headers=None,
max_age=21600, attach_to_all=True,
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index 11f73e4f..693adf25 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -4,12 +4,13 @@ import json
from flask import Flask, render_template, send_from_directory, request, \
url_for, abort, g, redirect, jsonify, session, flash
from flask_login import login_required
+
+from fatcat_client.rest import ApiException
+from fatcat_tools.transforms import *
from fatcat_web import app, api, auth_api, priv_api
from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth
-from fatcat_client.rest import ApiException
-from fatcat_web.search import *
from fatcat_web.cors import crossdomain
-from fatcat_tools.transforms import *
+from fatcat_web.search import *
### Views ###################################################################
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index f10ce406..3b48f49e 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -1,8 +1,4 @@
-import requests
-from flask import abort, flash
-from fatcat_web import app
-
"""
Helpers for doing elasticsearch queries (used in the web interface; not part of
the formal API)
@@ -10,6 +6,10 @@ the formal API)
TODO: ELASTICSEARCH_*_INDEX should probably be factored out and just hard-coded
"""
+import requests
+from flask import abort, flash
+from fatcat_web import app
+
def do_search(index, request, limit=30):
@@ -119,14 +119,14 @@ def get_elastic_entity_stats():
stats = {}
# 2. releases
- # x=> total count
- # x=> total citation records
- # x=> total (paper, chapter, proceeding)
- # x=> with fulltext on web
- # x=> open access
- # x=> not in KBART, in IA
- #
- # Can probably do the above with two queries:
+ # - total count
+ # - total citation records
+ # - total (paper, chapter, proceeding)
+ # - " with fulltext on web
+ # - " open access
+ # - " not in KBART, in IA
+ #
+ # Can do the above with two queries:
# - all releases, aggregate count and sum(ref_count)
# - in-scope works, aggregate count by (fulltext, OA, kbart/ia)
diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py
index f4c2f55d..d9d21c6d 100755
--- a/python/fatcat_worker.py
+++ b/python/fatcat_worker.py
@@ -1,9 +1,10 @@
#!/usr/bin/env python3
import sys
-import raven
import argparse
import datetime
+import raven
+
from fatcat_tools import public_api
from fatcat_tools.workers import ChangelogWorker, EntityUpdatesWorker, ElasticsearchReleaseWorker
diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py
index 8f694456..72ed068c 100644
--- a/python/tests/import_matched.py
+++ b/python/tests/import_matched.py
@@ -10,7 +10,7 @@ def matched_importer(api):
yield MatchedImporter(api)
# TODO: use API to check that entities actually created...
-def test_matched_importer(matched_importer):
+def test_matched_importer_basic(matched_importer):
with open('tests/files/example_matched.json', 'r') as f:
JsonLinePusher(matched_importer, f).run()