summaryrefslogtreecommitdiffstats
path: root/python/fatcat_web
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-12-13 17:43:27 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-12-13 18:07:53 -0800
commit9c1fd7cb8e60c397fa6defef2f0dc1eacc8d8aa7 (patch)
treec6d3f80610af009ebba799951bfe3a5c56e72694 /python/fatcat_web
parent4b2fc499ac77f4b929943b54a4c28b78659468e8 (diff)
downloadfatcat-9c1fd7cb8e60c397fa6defef2f0dc1eacc8d8aa7.tar.gz
fatcat-9c1fd7cb8e60c397fa6defef2f0dc1eacc8d8aa7.zip
update ingest request schema
This is mostly changing ingest_type from 'file' to 'pdf', and adding 'link_source'/'link_source_id', plus some small cleanups.
Diffstat (limited to 'python/fatcat_web')
-rw-r--r--python/fatcat_web/forms.py14
-rw-r--r--python/fatcat_web/routes.py8
2 files changed, 14 insertions, 8 deletions
diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py
index bd4e4bbd..5539cc20 100644
--- a/python/fatcat_web/forms.py
+++ b/python/fatcat_web/forms.py
@@ -386,18 +386,16 @@ class SavePaperNowForm(FlaskForm):
choices=release_stage_options,
default='')
- def to_ingest_request(self, release, actor='savepapernow-web'):
+ def to_ingest_request(self, release, ingest_request_source='savepapernow'):
base_url = self.base_url.data
ext_ids = release.ext_ids.to_dict()
# by default this dict has a bunch of empty values
ext_ids = dict([(k, v) for (k, v) in ext_ids.items() if v])
ingest_request = {
'ingest_type': self.ingest_type.data,
- 'ingest_request_source': actor, # TODO: deprecate?
- 'actor': actor,
+ 'ingest_request_source': ingest_request_source,
'base_url': base_url,
'fatcat': {
- 'release_stage': release.release_stage,
'release_ident': release.ident,
'work_ident': release.work_id,
},
@@ -405,8 +403,12 @@ class SavePaperNowForm(FlaskForm):
}
if self.release_stage.data:
ingest_request['release_stage'] = self.release_stage.data
+
if release.ext_ids.doi and base_url == "https://doi.org/{}".format(release.ext_ids.doi):
- ingest_request['source'] = 'doi'
- ingest_request['source_id'] = release.ext_ids.doi
+ ingest_request['link_source'] = 'doi'
+ ingest_request['link_source_id'] = release.ext_ids.doi
+ elif release.ext_ids.arxiv and base_url == "https://arxiv.org/pdf/{}.pdf".format(release.ext_ids.arxiv):
+ ingest_request['link_source'] = 'arxiv'
+ ingest_request['link_source_id'] = release.ext_ids.arxiv
return ingest_request
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index cc0af5cc..8583d255 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -648,11 +648,11 @@ def release_save(ident):
if form.is_submitted():
if form.validate_on_submit():
# got a valid spn request! try to send to kafka-pixy
- msg = form.to_ingest_request(release)
+ msg = form.to_ingest_request(release, ingest_request_source="savepapernow-web")
try:
kafka_pixy_produce(
Config.KAFKA_SAVEPAPERNOW_TOPIC,
- json.dumps(msg),
+ json.dumps(msg, sort_keys=True),
)
except Exception as e:
print(e, file=sys.stderr)
@@ -666,6 +666,10 @@ def release_save(ident):
form.release_stage.data = release.release_stage
if release.ext_ids.doi:
form.base_url.data = "https://doi.org/{}".format(release.ext_ids.doi)
+ elif release.ext_ids.arxiv:
+ form.base_url.data = "https://arxiv.org/pdf/{}.pdf".format(release.ext_ids.arxiv)
+ elif release.ext_ids.pmcid:
+ form.base_url.data = "http://europepmc.org/backend/ptpmcrender.fcgi?accid={}&blobtype=pdf".format(release.ext_ids.pmcid)
return render_template('release_save.html', entity=release, form=form), 200
### Search ##################################################################