aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/fatcat_tools/harvest/pubmed.py6
-rw-r--r--python/fatcat_tools/transforms/access.py12
-rw-r--r--python/fatcat_web/forms.py12
-rw-r--r--python/fatcat_web/ref_routes.py41
-rw-r--r--python/fatcat_web/templates/reference_match.html4
-rw-r--r--python_openapi_client/README.md7
-rw-r--r--python_openapi_client/fatcat_openapi_client/__version__.py2
-rw-r--r--python_openapi_client/setup.py4
8 files changed, 69 insertions, 19 deletions
diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py
index 579dd505..92798a99 100644
--- a/python/fatcat_tools/harvest/pubmed.py
+++ b/python/fatcat_tools/harvest/pubmed.py
@@ -263,10 +263,12 @@ def ftpretr(url, max_retries=10, retry_delay=1, proxy_hostport=None):
return f.name
-def ftpretr_via_http_proxy(url, proxy_hostport="159.69.240.245:15201", max_retries=10, retry_delay=1):
+def ftpretr_via_http_proxy(url, proxy_hostport="ftp.ncbi.nlm.nih.gov", max_retries=10, retry_delay=1):
"""
Fetch file from FTP via external HTTP proxy, e.g. ftp.host.com:/a/b/c would
- be retrievable via proxy.com/a/b/c.
+ be retrievable via proxy.com/a/b/c; (in 09/2021 we used
+ "159.69.240.245:15201" as proxy_hostport but that started to fail
+ 2021-10-15; just switch to NIH's http version).
"""
parsed = urlparse(url)
server, path = parsed.netloc, parsed.path
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py
index 5ed64c7c..39d4c6d3 100644
--- a/python/fatcat_tools/transforms/access.py
+++ b/python/fatcat_tools/transforms/access.py
@@ -36,10 +36,16 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
"""
Extracts access options from a release.
- TODO: proper implementation
+ TODO: proper implementation and filtering, instead of just returning first
+ option found
"""
options = []
for f in (release.files or []):
+ thumbnail_url = None
+ if f.mimetype == 'application/pdf' and f.sha1 and f.urls:
+ # NOTE: scholar.archive.org does an actual database check before
+ # generating these URLs, but we skip that for speed
+ thumbnail_url = f"https://blobs.fatcat.wiki/thumbnail/pdf/{f.sha1[0:2]}/{f.sha1[2:4]}/{f.sha1}.180px.jpg"
for u in (f.urls or []):
if '://web.archive.org/' in u.url:
return [AccessOption(
@@ -47,7 +53,7 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
access_url=u.url,
mimetype=f.mimetype,
size_bytes=f.size,
- thumbnail_url=None
+ thumbnail_url=thumbnail_url,
)]
elif '://archive.org/' in u.url:
return [AccessOption(
@@ -55,6 +61,6 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
access_url=u.url,
mimetype=f.mimetype,
size_bytes=f.size,
- thumbnail_url=None
+ thumbnail_url=thumbnail_url,
)]
return options
diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py
index a856ef22..2757ebd2 100644
--- a/python/fatcat_web/forms.py
+++ b/python/fatcat_web/forms.py
@@ -487,6 +487,10 @@ class EntityTomlForm(EntityEditForm):
class ReferenceMatchForm(FlaskForm):
+ class Meta:
+ # this is an API, so disable CSRF
+ csrf = False
+
submit_type = SelectField('submit_type',
[validators.DataRequired()],
choices=['parse', 'match'])
@@ -496,12 +500,20 @@ class ReferenceMatchForm(FlaskForm):
title = StringField("Title")
journal = StringField("Journal or Conference")
first_author = StringField("First Author")
+ #author_names = StringField("Author Names")
#year = IntegerField('Year Released',
# [validators.Optional(True), valid_year])
year = StringField("Year Released")
+ date = StringField("Date Released")
volume = StringField("Volume")
issue = StringField("Issue")
pages = StringField("Pages")
+ publisher = StringField("Publisher")
+ doi = StringField("DOI")
+ pmid = StringField("PubMed Identifier (PMID)")
+ arxiv_id = StringField("arxiv.org Identifier")
+ release_type = StringField("Release Type")
+ release_stage = StringField("Release Stage")
@staticmethod
def from_grobid_parse(parse_dict, raw_citation):
diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py
index d4219012..2d8ed413 100644
--- a/python/fatcat_web/ref_routes.py
+++ b/python/fatcat_web/ref_routes.py
@@ -3,13 +3,16 @@ Flask endpoints for reference (citation) endpoints. Eg, listing references
"inbound" and "outbound" from a specific release or work.
"""
-from flask import render_template, request, Response
+import json
+
+from flask import render_template, request, Response, jsonify
from fatcat_openapi_client import *
from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release
from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches
from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits
from fatcat_tools.transforms.access import release_access_options
+from fatcat_tools.transforms.entities import entity_to_dict
from fatcat_web import app, api
from fatcat_web.cors import crossdomain
from fatcat_web.forms import *
@@ -92,16 +95,18 @@ def wikipedia_view_refs_outbound(wiki_lang: str, wiki_article: str):
hits = _refs_web("out", wikipedia_article=wikipedia_article)
return render_template('wikipedia_view_fuzzy_refs.html', wiki_article=wiki_article, wiki_lang=wiki_lang, wiki_url=wiki_url, direction="out", hits=hits), 200
-
@app.route('/reference/match', methods=['GET', 'POST'])
def reference_match():
- form = ReferenceMatchForm()
grobid_status = None
grobid_dict = None
- if form.is_submitted():
- if form.validate_on_submit():
+ form = ReferenceMatchForm()
+ if not form.is_submitted() and request.args.get('submit_type'):
+ form = ReferenceMatchForm(request.args)
+
+ if form.is_submitted() or request.args.get('title'):
+ if form.validate():
if form.submit_type.data == 'parse':
resp_xml = grobid_api_process_citation(form.raw_citation.data)
if not resp_xml:
@@ -166,3 +171,29 @@ def wikipedia_view_refs_outbound_json(wiki_lang: str, wiki_article: str):
wikipedia_article = wiki_lang + ":" + wiki_article
hits = _refs_web("out", wikipedia_article=wikipedia_article)
return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+
+@app.route('/reference/match.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def reference_match_json():
+ form = ReferenceMatchForm(request.args)
+ if form.validate():
+ if form.submit_type.data == 'match':
+ matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or []
+ else:
+ raise NotImplementedError()
+ resp = []
+ for m in matches:
+ # expand releases more completely
+ m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs")
+ # hack in access options
+ m.access_options = release_access_options(m.release)
+
+ # and manually convert to dict (for jsonify)
+ info = m.__dict__
+ info['release'] = entity_to_dict(m.release)
+ info['access_options'] = [o.dict() for o in m.access_options]
+ resp.append(info)
+ return jsonify(resp), 200
+ else:
+ return Response(json.dumps(dict(errors=form.errors)), mimetype="application/json", status=400)
diff --git a/python/fatcat_web/templates/reference_match.html b/python/fatcat_web/templates/reference_match.html
index f2335f52..08ab33fc 100644
--- a/python/fatcat_web/templates/reference_match.html
+++ b/python/fatcat_web/templates/reference_match.html
@@ -83,7 +83,9 @@
<td class="">
{% if match.access_options %}
<a href="{{ match.access_options[0].access_url}}" class="ui tiny green active button">{{ match.access_options[0].access_type.name }}</a>
- {% endif %}
+ {% else %}
+ <i class="ui tiny grey inactive button">no fulltext</a>
+ {% endif %}
{% endfor %}
</tbody>
</table>
diff --git a/python_openapi_client/README.md b/python_openapi_client/README.md
index 43c9e9c5..8cc34147 100644
--- a/python_openapi_client/README.md
+++ b/python_openapi_client/README.md
@@ -3,14 +3,14 @@ Fatcat is a scalable, versioned, API-oriented catalog of bibliographic entities
This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project:
-- API version: 0.3.3
-- Package version: 0.3.3
+- API version: 0.4.0
+- Package version: 0.4.0
- Build package: org.openapitools.codegen.languages.PythonClientCodegen
For more information, please visit [https://fatcat.wiki](https://fatcat.wiki)
## Requirements.
-Python 2.7 and 3.5+
+Python 3.5+
## Installation & Usage
### pip install
@@ -162,6 +162,7 @@ Class | Method | HTTP request | Description
*DefaultApi* | [**get_work_revision**](docs/DefaultApi.md#get_work_revision) | **GET** /work/rev/{rev_id} |
*DefaultApi* | [**lookup_container**](docs/DefaultApi.md#lookup_container) | **GET** /container/lookup |
*DefaultApi* | [**lookup_creator**](docs/DefaultApi.md#lookup_creator) | **GET** /creator/lookup |
+*DefaultApi* | [**lookup_editor**](docs/DefaultApi.md#lookup_editor) | **GET** /editor/lookup |
*DefaultApi* | [**lookup_file**](docs/DefaultApi.md#lookup_file) | **GET** /file/lookup |
*DefaultApi* | [**lookup_release**](docs/DefaultApi.md#lookup_release) | **GET** /release/lookup |
*DefaultApi* | [**update_container**](docs/DefaultApi.md#update_container) | **PUT** /editgroup/{editgroup_id}/container/{ident} |
diff --git a/python_openapi_client/fatcat_openapi_client/__version__.py b/python_openapi_client/fatcat_openapi_client/__version__.py
index bd528324..618922fe 100644
--- a/python_openapi_client/fatcat_openapi_client/__version__.py
+++ b/python_openapi_client/fatcat_openapi_client/__version__.py
@@ -1,3 +1,3 @@
-VERSION = (0, 3, 3) # eg, (0, 2, '0dev0')
+VERSION = (0, 4, 0) # eg, (0, 2, '0dev0')
__version__ = '.'.join(map(str, VERSION))
diff --git a/python_openapi_client/setup.py b/python_openapi_client/setup.py
index 53b8e9de..bef29376 100644
--- a/python_openapi_client/setup.py
+++ b/python_openapi_client/setup.py
@@ -91,10 +91,6 @@ class UploadCommand(Command):
self.status('Uploading the package to PyPI via Twine…')
os.system('twine upload dist/*')
- self.status('Pushing git tags…')
- os.system('git tag v{0}'.format(about['__version__']))
- os.system('git push --tags')
-
sys.exit()