diff options
| -rwxr-xr-x | python/fatcat_harvest.py | 2 | ||||
| -rw-r--r-- | python/fatcat_tools/api_auth.py | 1 | ||||
| -rw-r--r-- | python/fatcat_tools/harvest/doi_registrars.py | 3 | ||||
| -rw-r--r-- | python/fatcat_tools/harvest/harvest_common.py | 1 | ||||
| -rw-r--r-- | python/fatcat_tools/harvest/oaipmh.py | 11 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/common.py | 4 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 1 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/journal_metadata.py | 3 | ||||
| -rw-r--r-- | python/fatcat_tools/workers/changelog.py | 3 | ||||
| -rw-r--r-- | python/fatcat_tools/workers/worker_common.py | 4 | ||||
| -rw-r--r-- | python/fatcat_web/auth.py | 3 | ||||
| -rw-r--r-- | python/fatcat_web/cors.py | 7 | ||||
| -rw-r--r-- | python/fatcat_web/routes.py | 7 | ||||
| -rw-r--r-- | python/fatcat_web/search.py | 24 | ||||
| -rwxr-xr-x | python/fatcat_worker.py | 3 | ||||
| -rw-r--r-- | python/tests/import_matched.py | 2 | 
16 files changed, 37 insertions, 42 deletions
| diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py index e28c9b08..e76e1b20 100755 --- a/python/fatcat_harvest.py +++ b/python/fatcat_harvest.py @@ -1,9 +1,9 @@  #!/usr/bin/env python3  import sys -import raven  import argparse  import datetime +import raven  from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker,\      HarvestArxivWorker, HarvestPubmedWorker, HarvestDoajArticleWorker,\      HarvestDoajJournalWorker diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py index c49051f6..3c643cea 100644 --- a/python/fatcat_tools/api_auth.py +++ b/python/fatcat_tools/api_auth.py @@ -37,4 +37,3 @@ def authenticated_api(host_uri, token=None):      api.auth_check()      return api - diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index 4a0cb8db..55d85ef9 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -4,9 +4,9 @@ import sys  import csv  import json  import time -import requests  import itertools  import datetime +import requests  from pykafka import KafkaClient  from fatcat_tools.workers import most_recent_message @@ -182,4 +182,3 @@ class HarvestDataciteWorker(HarvestCrossrefWorker):      def update_params(self, params, resp):          params['page[number]'] = resp['meta']['page'] + 1          return params - diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py index 6041a36f..f4d74be2 100644 --- a/python/fatcat_tools/harvest/harvest_common.py +++ b/python/fatcat_tools/harvest/harvest_common.py @@ -121,4 +121,3 @@ class HarvestState:              self.update(msg.value.decode('utf-8'))              c += 1          print("... got {} state update messages, done".format(c)) - diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py index abd917e0..923500fc 100644 --- a/python/fatcat_tools/harvest/oaipmh.py +++ b/python/fatcat_tools/harvest/oaipmh.py @@ -4,9 +4,9 @@ import sys  import csv  import json  import time -import requests  import itertools  import datetime +import requests  from pykafka import KafkaClient  import sickle @@ -102,7 +102,7 @@ class HarvestArxivWorker(HarvestOaiPmhWorker):      """      def __init__(self, **kwargs): -        super().__init__(**kwargs)  +        super().__init__(**kwargs)          self.endpoint_url = "https://export.arxiv.org/oai2"          self.metadata_prefix = "arXiv"          self.name = "arxiv" @@ -117,7 +117,7 @@ class HarvestPubmedWorker(HarvestOaiPmhWorker):      """      def __init__(self, **kwargs): -        super().__init__(**kwargs)  +        super().__init__(**kwargs)          self.endpoint_url = "https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi"          self.metadata_prefix = "pmc_fm"          self.name = "pubmed" @@ -132,7 +132,7 @@ class HarvestDoajJournalWorker(HarvestOaiPmhWorker):      """      def __init__(self, **kwargs): -        super().__init__(**kwargs)  +        super().__init__(**kwargs)          self.endpoint_url = "https://www.doaj.org/oai"          self.metadata_prefix = "oai_dc"          self.name = "doaj-journal" @@ -144,8 +144,7 @@ class HarvestDoajArticleWorker(HarvestOaiPmhWorker):      """      def __init__(self, **kwargs): -        super().__init__(**kwargs)  +        super().__init__(**kwargs)          self.endpoint_url = "https://www.doaj.org/oai.article"          self.metadata_prefix = "oai_doaj"          self.name = "doaj-article" - diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 32f7b4d5..b89c3828 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -111,7 +111,7 @@ def test_make_rel_url():  class EntityImporter:      """      Base class for fatcat entity importers. -     +      The API exposed to record iterator is:          push_record(raw_record) @@ -143,7 +143,7 @@ class EntityImporter:          eg_extra['git_rev'] = eg_extra.get('git_rev',              subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8')          eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.EntityImporter') -         +          self.api = api          self.bezerk_mode = kwargs.get('bezerk_mode', False)          self.edit_batch_size = kwargs.get('edit_batch_size', 100) diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index d693bc0a..7281a7a1 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -194,4 +194,3 @@ class GrobidMetadataImporter(EntityImporter):              autoaccept=True,              description=self.editgroup_description,              extra=json.dumps(self.editgroup_extra)) - diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py index 75ee34d8..9bba2b12 100644 --- a/python/fatcat_tools/importers/journal_metadata.py +++ b/python/fatcat_tools/importers/journal_metadata.py @@ -118,7 +118,7 @@ class JournalMetadataImporter(EntityImporter):          if existing:              self.counts['exists'] += 1              return False -         +          return True      def insert_batch(self, batch): @@ -126,4 +126,3 @@ class JournalMetadataImporter(EntityImporter):              autoaccept=True,              description=self.editgroup_description,              extra=json.dumps(self.editgroup_extra)) - diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index 636ed304..39a84f18 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -34,7 +34,7 @@ class ChangelogWorker(FatcatWorker):                  self.offset = 1          with topic.get_producer() as producer: -            while True:  +            while True:                  latest = int(self.api.get_changelog(limit=1)[0].index)                  if latest > self.offset:                      print("Fetching changelogs from {} through {}".format( @@ -101,4 +101,3 @@ class EntityUpdatesWorker(FatcatWorker):                          timestamp=None,                      )                  #consumer.commit_offsets() - diff --git a/python/fatcat_tools/workers/worker_common.py b/python/fatcat_tools/workers/worker_common.py index b84341c7..57fb710c 100644 --- a/python/fatcat_tools/workers/worker_common.py +++ b/python/fatcat_tools/workers/worker_common.py @@ -4,10 +4,11 @@ import sys  import csv  import json  import itertools -import fatcat_client  from itertools import islice  from pykafka import KafkaClient  from pykafka.common import OffsetType + +import fatcat_client  from fatcat_client.rest import ApiException @@ -51,4 +52,3 @@ class FatcatWorker:          self.kafka = KafkaClient(hosts=kafka_hosts, broker_version="1.0.0")          self.produce_topic = produce_topic          self.consume_topic = consume_topic - diff --git a/python/fatcat_web/auth.py b/python/fatcat_web/auth.py index 03964c92..1953151b 100644 --- a/python/fatcat_web/auth.py +++ b/python/fatcat_web/auth.py @@ -4,8 +4,8 @@ import requests  import pymacaroons  from flask import Flask, render_template, send_from_directory, request, \      url_for, abort, g, redirect, jsonify, session, flash -from fatcat_web import login_manager, api, priv_api, Config  from flask_login import logout_user, login_user, UserMixin +from fatcat_web import login_manager, api, priv_api, Config  import fatcat_client  def handle_logout(): @@ -141,4 +141,3 @@ def load_user(editor_id):      user.username = editor['username']      user.token = token      return user - diff --git a/python/fatcat_web/cors.py b/python/fatcat_web/cors.py index 2285cf80..d84babd8 100644 --- a/python/fatcat_web/cors.py +++ b/python/fatcat_web/cors.py @@ -1,12 +1,13 @@ -from datetime import timedelta -from flask import make_response, request, current_app -from functools import update_wrapper  """  This snippet from: http://flask.pocoo.org/snippets/56/  "Posted by Armin Ronacher on 2011-07-14"  """ +from datetime import timedelta +from functools import update_wrapper +from flask import make_response, request, current_app +  def crossdomain(origin=None, methods=None, headers=None,                  max_age=21600, attach_to_all=True, diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 11f73e4f..693adf25 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -4,12 +4,13 @@ import json  from flask import Flask, render_template, send_from_directory, request, \      url_for, abort, g, redirect, jsonify, session, flash  from flask_login import login_required + +from fatcat_client.rest import ApiException +from fatcat_tools.transforms import *  from fatcat_web import app, api, auth_api, priv_api  from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth -from fatcat_client.rest import ApiException -from fatcat_web.search import *  from fatcat_web.cors import crossdomain -from fatcat_tools.transforms import * +from fatcat_web.search import *  ### Views ################################################################### diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index f10ce406..3b48f49e 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -1,8 +1,4 @@ -import requests -from flask import abort, flash -from fatcat_web import app -  """  Helpers for doing elasticsearch queries (used in the web interface; not part of  the formal API) @@ -10,6 +6,10 @@ the formal API)  TODO: ELASTICSEARCH_*_INDEX should probably be factored out and just hard-coded  """ +import requests +from flask import abort, flash +from fatcat_web import app +  def do_search(index, request, limit=30): @@ -119,14 +119,14 @@ def get_elastic_entity_stats():      stats = {}      # 2. releases -    #  x=> total count -    #  x=> total citation records -    #  x=> total (paper, chapter, proceeding) -    #  x=> with fulltext on web -    #  x=> open access -    #  x=> not in KBART, in IA -    #  -    # Can probably do the above with two queries: +    #  - total count +    #  - total citation records +    #  - total (paper, chapter, proceeding) +    #  - " with fulltext on web +    #  - " open access +    #  - " not in KBART, in IA +    # +    # Can do the above with two queries:      #  - all releases, aggregate count and sum(ref_count)      #  - in-scope works, aggregate count by (fulltext, OA, kbart/ia) diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py index f4c2f55d..d9d21c6d 100755 --- a/python/fatcat_worker.py +++ b/python/fatcat_worker.py @@ -1,9 +1,10 @@  #!/usr/bin/env python3  import sys -import raven  import argparse  import datetime +import raven +  from fatcat_tools import public_api  from fatcat_tools.workers import ChangelogWorker, EntityUpdatesWorker, ElasticsearchReleaseWorker diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py index 8f694456..72ed068c 100644 --- a/python/tests/import_matched.py +++ b/python/tests/import_matched.py @@ -10,7 +10,7 @@ def matched_importer(api):      yield MatchedImporter(api)  # TODO: use API to check that entities actually created... -def test_matched_importer(matched_importer): +def test_matched_importer_basic(matched_importer):      with open('tests/files/example_matched.json', 'r') as f:          JsonLinePusher(matched_importer, f).run() | 
