diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-04-15 23:31:07 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-07-23 10:55:09 -0700 |
commit | 314aba35d06eb80be0c5ffc068774bb9bca38e76 (patch) | |
tree | 1f780f3e2e1808247cffc167f73eba3272353df7 /python/fatcat_web/forms.py | |
parent | 15680e0caae7ff6e24ddca8584b0c590d2b30581 (diff) | |
download | fatcat-314aba35d06eb80be0c5ffc068774bb9bca38e76.tar.gz fatcat-314aba35d06eb80be0c5ffc068774bb9bca38e76.zip |
web: initial implementation of fuzzy citation parsing and matching tool
Diffstat (limited to 'python/fatcat_web/forms.py')
-rw-r--r-- | python/fatcat_web/forms.py | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index 1c9fb199..19176a59 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -482,3 +482,44 @@ class EntityTomlForm(EntityEditForm): etf.toml.data = entity_to_toml(entity, pop_fields=pop_fields) return etf + +class ReferenceMatchForm(FlaskForm): + + submit_type = SelectField('submit_type', + [validators.DataRequired()], + choices=['parse', 'match']) + + raw_citation = TextAreaField("Citation String", render_kw={'rows':'3'}) + + title = StringField("Title") + journal = StringField("Journal or Conference") + first_author = StringField("First Author") + #year = IntegerField('Year Released', + # [validators.Optional(True), valid_year]) + year = StringField("Year Released") + volume = StringField("Volume") + issue = StringField("Issue") + pages = StringField("Pages") + + @staticmethod + def from_grobid_parse(parse_dict, raw_citation): + """ + Initializes form from GROBID extraction + """ + rmf = ReferenceMatchForm() + rmf.raw_citation.data = raw_citation + + direct_fields = ['title', 'journal', 'volume', 'issue', 'pages'] + for k in direct_fields: + if parse_dict.get(k): + a = getattr(rmf, k) + a.data = parse_dict[k] + + date = parse_dict.get('date') + if date and len(date) >= 4 and date[0:4].isdigit(): + rmf.year.data = int(date[0:4]) + + if parse_dict.get('authors'): + rmf.first_author.data = parse_dict['authors'][0].get('name') + + return rmf |