diff options
-rw-r--r-- | python/fatcat_tools/harvest/pubmed.py | 6 | ||||
-rw-r--r-- | python/fatcat_tools/transforms/access.py | 12 | ||||
-rw-r--r-- | python/fatcat_web/forms.py | 12 | ||||
-rw-r--r-- | python/fatcat_web/ref_routes.py | 41 | ||||
-rw-r--r-- | python/fatcat_web/templates/reference_match.html | 4 | ||||
-rw-r--r-- | python_openapi_client/README.md | 7 | ||||
-rw-r--r-- | python_openapi_client/fatcat_openapi_client/__version__.py | 2 | ||||
-rw-r--r-- | python_openapi_client/setup.py | 4 |
8 files changed, 69 insertions, 19 deletions
diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py index 579dd505..92798a99 100644 --- a/python/fatcat_tools/harvest/pubmed.py +++ b/python/fatcat_tools/harvest/pubmed.py @@ -263,10 +263,12 @@ def ftpretr(url, max_retries=10, retry_delay=1, proxy_hostport=None): return f.name -def ftpretr_via_http_proxy(url, proxy_hostport="159.69.240.245:15201", max_retries=10, retry_delay=1): +def ftpretr_via_http_proxy(url, proxy_hostport="ftp.ncbi.nlm.nih.gov", max_retries=10, retry_delay=1): """ Fetch file from FTP via external HTTP proxy, e.g. ftp.host.com:/a/b/c would - be retrievable via proxy.com/a/b/c. + be retrievable via proxy.com/a/b/c; (in 09/2021 we used + "159.69.240.245:15201" as proxy_hostport but that started to fail + 2021-10-15; just switch to NIH's http version). """ parsed = urlparse(url) server, path = parsed.netloc, parsed.path diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py index 5ed64c7c..39d4c6d3 100644 --- a/python/fatcat_tools/transforms/access.py +++ b/python/fatcat_tools/transforms/access.py @@ -36,10 +36,16 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]: """ Extracts access options from a release. - TODO: proper implementation + TODO: proper implementation and filtering, instead of just returning first + option found """ options = [] for f in (release.files or []): + thumbnail_url = None + if f.mimetype == 'application/pdf' and f.sha1 and f.urls: + # NOTE: scholar.archive.org does an actual database check before + # generating these URLs, but we skip that for speed + thumbnail_url = f"https://blobs.fatcat.wiki/thumbnail/pdf/{f.sha1[0:2]}/{f.sha1[2:4]}/{f.sha1}.180px.jpg" for u in (f.urls or []): if '://web.archive.org/' in u.url: return [AccessOption( @@ -47,7 +53,7 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]: access_url=u.url, mimetype=f.mimetype, size_bytes=f.size, - thumbnail_url=None + thumbnail_url=thumbnail_url, )] elif '://archive.org/' in u.url: return [AccessOption( @@ -55,6 +61,6 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]: access_url=u.url, mimetype=f.mimetype, size_bytes=f.size, - thumbnail_url=None + thumbnail_url=thumbnail_url, )] return options diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index a856ef22..2757ebd2 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -487,6 +487,10 @@ class EntityTomlForm(EntityEditForm): class ReferenceMatchForm(FlaskForm): + class Meta: + # this is an API, so disable CSRF + csrf = False + submit_type = SelectField('submit_type', [validators.DataRequired()], choices=['parse', 'match']) @@ -496,12 +500,20 @@ class ReferenceMatchForm(FlaskForm): title = StringField("Title") journal = StringField("Journal or Conference") first_author = StringField("First Author") + #author_names = StringField("Author Names") #year = IntegerField('Year Released', # [validators.Optional(True), valid_year]) year = StringField("Year Released") + date = StringField("Date Released") volume = StringField("Volume") issue = StringField("Issue") pages = StringField("Pages") + publisher = StringField("Publisher") + doi = StringField("DOI") + pmid = StringField("PubMed Identifier (PMID)") + arxiv_id = StringField("arxiv.org Identifier") + release_type = StringField("Release Type") + release_stage = StringField("Release Stage") @staticmethod def from_grobid_parse(parse_dict, raw_citation): diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py index d4219012..2d8ed413 100644 --- a/python/fatcat_web/ref_routes.py +++ b/python/fatcat_web/ref_routes.py @@ -3,13 +3,16 @@ Flask endpoints for reference (citation) endpoints. Eg, listing references "inbound" and "outbound" from a specific release or work. """ -from flask import render_template, request, Response +import json + +from flask import render_template, request, Response, jsonify from fatcat_openapi_client import * from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits from fatcat_tools.transforms.access import release_access_options +from fatcat_tools.transforms.entities import entity_to_dict from fatcat_web import app, api from fatcat_web.cors import crossdomain from fatcat_web.forms import * @@ -92,16 +95,18 @@ def wikipedia_view_refs_outbound(wiki_lang: str, wiki_article: str): hits = _refs_web("out", wikipedia_article=wikipedia_article) return render_template('wikipedia_view_fuzzy_refs.html', wiki_article=wiki_article, wiki_lang=wiki_lang, wiki_url=wiki_url, direction="out", hits=hits), 200 - @app.route('/reference/match', methods=['GET', 'POST']) def reference_match(): - form = ReferenceMatchForm() grobid_status = None grobid_dict = None - if form.is_submitted(): - if form.validate_on_submit(): + form = ReferenceMatchForm() + if not form.is_submitted() and request.args.get('submit_type'): + form = ReferenceMatchForm(request.args) + + if form.is_submitted() or request.args.get('title'): + if form.validate(): if form.submit_type.data == 'parse': resp_xml = grobid_api_process_citation(form.raw_citation.data) if not resp_xml: @@ -166,3 +171,29 @@ def wikipedia_view_refs_outbound_json(wiki_lang: str, wiki_article: str): wikipedia_article = wiki_lang + ":" + wiki_article hits = _refs_web("out", wikipedia_article=wikipedia_article) return Response(hits.json(exclude_unset=True), mimetype="application/json") + + +@app.route('/reference/match.json', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def reference_match_json(): + form = ReferenceMatchForm(request.args) + if form.validate(): + if form.submit_type.data == 'match': + matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or [] + else: + raise NotImplementedError() + resp = [] + for m in matches: + # expand releases more completely + m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs") + # hack in access options + m.access_options = release_access_options(m.release) + + # and manually convert to dict (for jsonify) + info = m.__dict__ + info['release'] = entity_to_dict(m.release) + info['access_options'] = [o.dict() for o in m.access_options] + resp.append(info) + return jsonify(resp), 200 + else: + return Response(json.dumps(dict(errors=form.errors)), mimetype="application/json", status=400) diff --git a/python/fatcat_web/templates/reference_match.html b/python/fatcat_web/templates/reference_match.html index f2335f52..08ab33fc 100644 --- a/python/fatcat_web/templates/reference_match.html +++ b/python/fatcat_web/templates/reference_match.html @@ -83,7 +83,9 @@ <td class=""> {% if match.access_options %} <a href="{{ match.access_options[0].access_url}}" class="ui tiny green active button">{{ match.access_options[0].access_type.name }}</a> - {% endif %} + {% else %} + <i class="ui tiny grey inactive button">no fulltext</a> + {% endif %} {% endfor %} </tbody> </table> diff --git a/python_openapi_client/README.md b/python_openapi_client/README.md index 43c9e9c5..8cc34147 100644 --- a/python_openapi_client/README.md +++ b/python_openapi_client/README.md @@ -3,14 +3,14 @@ Fatcat is a scalable, versioned, API-oriented catalog of bibliographic entities This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project: -- API version: 0.3.3 -- Package version: 0.3.3 +- API version: 0.4.0 +- Package version: 0.4.0 - Build package: org.openapitools.codegen.languages.PythonClientCodegen For more information, please visit [https://fatcat.wiki](https://fatcat.wiki) ## Requirements. -Python 2.7 and 3.5+ +Python 3.5+ ## Installation & Usage ### pip install @@ -162,6 +162,7 @@ Class | Method | HTTP request | Description *DefaultApi* | [**get_work_revision**](docs/DefaultApi.md#get_work_revision) | **GET** /work/rev/{rev_id} | *DefaultApi* | [**lookup_container**](docs/DefaultApi.md#lookup_container) | **GET** /container/lookup | *DefaultApi* | [**lookup_creator**](docs/DefaultApi.md#lookup_creator) | **GET** /creator/lookup | +*DefaultApi* | [**lookup_editor**](docs/DefaultApi.md#lookup_editor) | **GET** /editor/lookup | *DefaultApi* | [**lookup_file**](docs/DefaultApi.md#lookup_file) | **GET** /file/lookup | *DefaultApi* | [**lookup_release**](docs/DefaultApi.md#lookup_release) | **GET** /release/lookup | *DefaultApi* | [**update_container**](docs/DefaultApi.md#update_container) | **PUT** /editgroup/{editgroup_id}/container/{ident} | diff --git a/python_openapi_client/fatcat_openapi_client/__version__.py b/python_openapi_client/fatcat_openapi_client/__version__.py index bd528324..618922fe 100644 --- a/python_openapi_client/fatcat_openapi_client/__version__.py +++ b/python_openapi_client/fatcat_openapi_client/__version__.py @@ -1,3 +1,3 @@ -VERSION = (0, 3, 3) # eg, (0, 2, '0dev0') +VERSION = (0, 4, 0) # eg, (0, 2, '0dev0') __version__ = '.'.join(map(str, VERSION)) diff --git a/python_openapi_client/setup.py b/python_openapi_client/setup.py index 53b8e9de..bef29376 100644 --- a/python_openapi_client/setup.py +++ b/python_openapi_client/setup.py @@ -91,10 +91,6 @@ class UploadCommand(Command): self.status('Uploading the package to PyPI via Twine…') os.system('twine upload dist/*') - self.status('Pushing git tags…') - os.system('git tag v{0}'.format(about['__version__'])) - os.system('git push --tags') - sys.exit() |