From 9c1fd7cb8e60c397fa6defef2f0dc1eacc8d8aa7 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 13 Dec 2019 17:43:27 -0800 Subject: update ingest request schema This is mostly changing ingest_type from 'file' to 'pdf', and adding 'link_source'/'link_source_id', plus some small cleanups. --- python/fatcat_web/forms.py | 14 ++++++++------ python/fatcat_web/routes.py | 8 ++++++-- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'python/fatcat_web') diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index bd4e4bbd..5539cc20 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -386,18 +386,16 @@ class SavePaperNowForm(FlaskForm): choices=release_stage_options, default='') - def to_ingest_request(self, release, actor='savepapernow-web'): + def to_ingest_request(self, release, ingest_request_source='savepapernow'): base_url = self.base_url.data ext_ids = release.ext_ids.to_dict() # by default this dict has a bunch of empty values ext_ids = dict([(k, v) for (k, v) in ext_ids.items() if v]) ingest_request = { 'ingest_type': self.ingest_type.data, - 'ingest_request_source': actor, # TODO: deprecate? - 'actor': actor, + 'ingest_request_source': ingest_request_source, 'base_url': base_url, 'fatcat': { - 'release_stage': release.release_stage, 'release_ident': release.ident, 'work_ident': release.work_id, }, @@ -405,8 +403,12 @@ class SavePaperNowForm(FlaskForm): } if self.release_stage.data: ingest_request['release_stage'] = self.release_stage.data + if release.ext_ids.doi and base_url == "https://doi.org/{}".format(release.ext_ids.doi): - ingest_request['source'] = 'doi' - ingest_request['source_id'] = release.ext_ids.doi + ingest_request['link_source'] = 'doi' + ingest_request['link_source_id'] = release.ext_ids.doi + elif release.ext_ids.arxiv and base_url == "https://arxiv.org/pdf/{}.pdf".format(release.ext_ids.arxiv): + ingest_request['link_source'] = 'arxiv' + ingest_request['link_source_id'] = release.ext_ids.arxiv return ingest_request diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index cc0af5cc..8583d255 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -648,11 +648,11 @@ def release_save(ident): if form.is_submitted(): if form.validate_on_submit(): # got a valid spn request! try to send to kafka-pixy - msg = form.to_ingest_request(release) + msg = form.to_ingest_request(release, ingest_request_source="savepapernow-web") try: kafka_pixy_produce( Config.KAFKA_SAVEPAPERNOW_TOPIC, - json.dumps(msg), + json.dumps(msg, sort_keys=True), ) except Exception as e: print(e, file=sys.stderr) @@ -666,6 +666,10 @@ def release_save(ident): form.release_stage.data = release.release_stage if release.ext_ids.doi: form.base_url.data = "https://doi.org/{}".format(release.ext_ids.doi) + elif release.ext_ids.arxiv: + form.base_url.data = "https://arxiv.org/pdf/{}.pdf".format(release.ext_ids.arxiv) + elif release.ext_ids.pmcid: + form.base_url.data = "http://europepmc.org/backend/ptpmcrender.fcgi?accid={}&blobtype=pdf".format(release.ext_ids.pmcid) return render_template('release_save.html', entity=release, form=form), 200 ### Search ################################################################## -- cgit v1.2.3