aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-11-04 17:18:49 -0700
committerBryan Newbold <bnewbold@archive.org>2021-11-04 17:19:52 -0700
commit78485352c493c1ccd952de2143e29ae28913ee50 (patch)
treef9cda3a548291427e6e81cccce7150960ee5a32a
parentc0da811394b9de8e30e94fa46933c72b8e5fdb19 (diff)
downloadsandcrawler-78485352c493c1ccd952de2143e29ae28913ee50.tar.gz
sandcrawler-78485352c493c1ccd952de2143e29ae28913ee50.zip
grobid: update 'TODO' comment based on review
-rw-r--r--python/sandcrawler/grobid.py3
1 files changed, 0 insertions, 3 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py
index a6858ff..f221830 100644
--- a/python/sandcrawler/grobid.py
+++ b/python/sandcrawler/grobid.py
@@ -83,8 +83,6 @@ class GrobidClient(object):
- status (slug)
- error_msg (if status == 'error')
- tei_xml (if status is 200)
-
- TODO: persist connection for performance?
"""
assert blob
@@ -188,7 +186,6 @@ class GrobidClient(object):
if len(ref.get("unstructured", "").strip()) <= 6:
return False
- # TODO: what other combinations are enough to skip parsing?
if (
ref.get("year")
and ref.get("author")