diff options
Diffstat (limited to 'python')
-rwxr-xr-x | python/fatcat_harvest.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/api_auth.py | 1 | ||||
-rw-r--r-- | python/fatcat_tools/harvest/doi_registrars.py | 3 | ||||
-rw-r--r-- | python/fatcat_tools/harvest/harvest_common.py | 1 | ||||
-rw-r--r-- | python/fatcat_tools/harvest/oaipmh.py | 11 | ||||
-rw-r--r-- | python/fatcat_tools/importers/common.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 1 | ||||
-rw-r--r-- | python/fatcat_tools/importers/journal_metadata.py | 3 | ||||
-rw-r--r-- | python/fatcat_tools/workers/changelog.py | 3 | ||||
-rw-r--r-- | python/fatcat_tools/workers/worker_common.py | 4 | ||||
-rw-r--r-- | python/fatcat_web/auth.py | 3 | ||||
-rw-r--r-- | python/fatcat_web/cors.py | 7 | ||||
-rw-r--r-- | python/fatcat_web/routes.py | 7 | ||||
-rw-r--r-- | python/fatcat_web/search.py | 24 | ||||
-rwxr-xr-x | python/fatcat_worker.py | 3 | ||||
-rw-r--r-- | python/tests/import_matched.py | 2 |
16 files changed, 37 insertions, 42 deletions
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py index e28c9b08..e76e1b20 100755 --- a/python/fatcat_harvest.py +++ b/python/fatcat_harvest.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 import sys -import raven import argparse import datetime +import raven from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker,\ HarvestArxivWorker, HarvestPubmedWorker, HarvestDoajArticleWorker,\ HarvestDoajJournalWorker diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py index c49051f6..3c643cea 100644 --- a/python/fatcat_tools/api_auth.py +++ b/python/fatcat_tools/api_auth.py @@ -37,4 +37,3 @@ def authenticated_api(host_uri, token=None): api.auth_check() return api - diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index 4a0cb8db..55d85ef9 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -4,9 +4,9 @@ import sys import csv import json import time -import requests import itertools import datetime +import requests from pykafka import KafkaClient from fatcat_tools.workers import most_recent_message @@ -182,4 +182,3 @@ class HarvestDataciteWorker(HarvestCrossrefWorker): def update_params(self, params, resp): params['page[number]'] = resp['meta']['page'] + 1 return params - diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py index 6041a36f..f4d74be2 100644 --- a/python/fatcat_tools/harvest/harvest_common.py +++ b/python/fatcat_tools/harvest/harvest_common.py @@ -121,4 +121,3 @@ class HarvestState: self.update(msg.value.decode('utf-8')) c += 1 print("... got {} state update messages, done".format(c)) - diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py index abd917e0..923500fc 100644 --- a/python/fatcat_tools/harvest/oaipmh.py +++ b/python/fatcat_tools/harvest/oaipmh.py @@ -4,9 +4,9 @@ import sys import csv import json import time -import requests import itertools import datetime +import requests from pykafka import KafkaClient import sickle @@ -102,7 +102,7 @@ class HarvestArxivWorker(HarvestOaiPmhWorker): """ def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(**kwargs) self.endpoint_url = "https://export.arxiv.org/oai2" self.metadata_prefix = "arXiv" self.name = "arxiv" @@ -117,7 +117,7 @@ class HarvestPubmedWorker(HarvestOaiPmhWorker): """ def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(**kwargs) self.endpoint_url = "https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi" self.metadata_prefix = "pmc_fm" self.name = "pubmed" @@ -132,7 +132,7 @@ class HarvestDoajJournalWorker(HarvestOaiPmhWorker): """ def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(**kwargs) self.endpoint_url = "https://www.doaj.org/oai" self.metadata_prefix = "oai_dc" self.name = "doaj-journal" @@ -144,8 +144,7 @@ class HarvestDoajArticleWorker(HarvestOaiPmhWorker): """ def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(**kwargs) self.endpoint_url = "https://www.doaj.org/oai.article" self.metadata_prefix = "oai_doaj" self.name = "doaj-article" - diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 32f7b4d5..b89c3828 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -111,7 +111,7 @@ def test_make_rel_url(): class EntityImporter: """ Base class for fatcat entity importers. - + The API exposed to record iterator is: push_record(raw_record) @@ -143,7 +143,7 @@ class EntityImporter: eg_extra['git_rev'] = eg_extra.get('git_rev', subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8') eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.EntityImporter') - + self.api = api self.bezerk_mode = kwargs.get('bezerk_mode', False) self.edit_batch_size = kwargs.get('edit_batch_size', 100) diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index d693bc0a..7281a7a1 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -194,4 +194,3 @@ class GrobidMetadataImporter(EntityImporter): autoaccept=True, description=self.editgroup_description, extra=json.dumps(self.editgroup_extra)) - diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py index 75ee34d8..9bba2b12 100644 --- a/python/fatcat_tools/importers/journal_metadata.py +++ b/python/fatcat_tools/importers/journal_metadata.py @@ -118,7 +118,7 @@ class JournalMetadataImporter(EntityImporter): if existing: self.counts['exists'] += 1 return False - + return True def insert_batch(self, batch): @@ -126,4 +126,3 @@ class JournalMetadataImporter(EntityImporter): autoaccept=True, description=self.editgroup_description, extra=json.dumps(self.editgroup_extra)) - diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index 636ed304..39a84f18 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -34,7 +34,7 @@ class ChangelogWorker(FatcatWorker): self.offset = 1 with topic.get_producer() as producer: - while True: + while True: latest = int(self.api.get_changelog(limit=1)[0].index) if latest > self.offset: print("Fetching changelogs from {} through {}".format( @@ -101,4 +101,3 @@ class EntityUpdatesWorker(FatcatWorker): timestamp=None, ) #consumer.commit_offsets() - diff --git a/python/fatcat_tools/workers/worker_common.py b/python/fatcat_tools/workers/worker_common.py index b84341c7..57fb710c 100644 --- a/python/fatcat_tools/workers/worker_common.py +++ b/python/fatcat_tools/workers/worker_common.py @@ -4,10 +4,11 @@ import sys import csv import json import itertools -import fatcat_client from itertools import islice from pykafka import KafkaClient from pykafka.common import OffsetType + +import fatcat_client from fatcat_client.rest import ApiException @@ -51,4 +52,3 @@ class FatcatWorker: self.kafka = KafkaClient(hosts=kafka_hosts, broker_version="1.0.0") self.produce_topic = produce_topic self.consume_topic = consume_topic - diff --git a/python/fatcat_web/auth.py b/python/fatcat_web/auth.py index 03964c92..1953151b 100644 --- a/python/fatcat_web/auth.py +++ b/python/fatcat_web/auth.py @@ -4,8 +4,8 @@ import requests import pymacaroons from flask import Flask, render_template, send_from_directory, request, \ url_for, abort, g, redirect, jsonify, session, flash -from fatcat_web import login_manager, api, priv_api, Config from flask_login import logout_user, login_user, UserMixin +from fatcat_web import login_manager, api, priv_api, Config import fatcat_client def handle_logout(): @@ -141,4 +141,3 @@ def load_user(editor_id): user.username = editor['username'] user.token = token return user - diff --git a/python/fatcat_web/cors.py b/python/fatcat_web/cors.py index 2285cf80..d84babd8 100644 --- a/python/fatcat_web/cors.py +++ b/python/fatcat_web/cors.py @@ -1,12 +1,13 @@ -from datetime import timedelta -from flask import make_response, request, current_app -from functools import update_wrapper """ This snippet from: http://flask.pocoo.org/snippets/56/ "Posted by Armin Ronacher on 2011-07-14" """ +from datetime import timedelta +from functools import update_wrapper +from flask import make_response, request, current_app + def crossdomain(origin=None, methods=None, headers=None, max_age=21600, attach_to_all=True, diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 11f73e4f..693adf25 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -4,12 +4,13 @@ import json from flask import Flask, render_template, send_from_directory, request, \ url_for, abort, g, redirect, jsonify, session, flash from flask_login import login_required + +from fatcat_client.rest import ApiException +from fatcat_tools.transforms import * from fatcat_web import app, api, auth_api, priv_api from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth -from fatcat_client.rest import ApiException -from fatcat_web.search import * from fatcat_web.cors import crossdomain -from fatcat_tools.transforms import * +from fatcat_web.search import * ### Views ################################################################### diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index f10ce406..3b48f49e 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -1,8 +1,4 @@ -import requests -from flask import abort, flash -from fatcat_web import app - """ Helpers for doing elasticsearch queries (used in the web interface; not part of the formal API) @@ -10,6 +6,10 @@ the formal API) TODO: ELASTICSEARCH_*_INDEX should probably be factored out and just hard-coded """ +import requests +from flask import abort, flash +from fatcat_web import app + def do_search(index, request, limit=30): @@ -119,14 +119,14 @@ def get_elastic_entity_stats(): stats = {} # 2. releases - # x=> total count - # x=> total citation records - # x=> total (paper, chapter, proceeding) - # x=> with fulltext on web - # x=> open access - # x=> not in KBART, in IA - # - # Can probably do the above with two queries: + # - total count + # - total citation records + # - total (paper, chapter, proceeding) + # - " with fulltext on web + # - " open access + # - " not in KBART, in IA + # + # Can do the above with two queries: # - all releases, aggregate count and sum(ref_count) # - in-scope works, aggregate count by (fulltext, OA, kbart/ia) diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py index f4c2f55d..d9d21c6d 100755 --- a/python/fatcat_worker.py +++ b/python/fatcat_worker.py @@ -1,9 +1,10 @@ #!/usr/bin/env python3 import sys -import raven import argparse import datetime +import raven + from fatcat_tools import public_api from fatcat_tools.workers import ChangelogWorker, EntityUpdatesWorker, ElasticsearchReleaseWorker diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py index 8f694456..72ed068c 100644 --- a/python/tests/import_matched.py +++ b/python/tests/import_matched.py @@ -10,7 +10,7 @@ def matched_importer(api): yield MatchedImporter(api) # TODO: use API to check that entities actually created... -def test_matched_importer(matched_importer): +def test_matched_importer_basic(matched_importer): with open('tests/files/example_matched.json', 'r') as f: JsonLinePusher(matched_importer, f).run() |