diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-11-04 17:18:49 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-11-04 17:19:52 -0700 |
commit | 78485352c493c1ccd952de2143e29ae28913ee50 (patch) | |
tree | f9cda3a548291427e6e81cccce7150960ee5a32a /python | |
parent | c0da811394b9de8e30e94fa46933c72b8e5fdb19 (diff) | |
download | sandcrawler-78485352c493c1ccd952de2143e29ae28913ee50.tar.gz sandcrawler-78485352c493c1ccd952de2143e29ae28913ee50.zip |
grobid: update 'TODO' comment based on review
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/grobid.py | 3 |
1 files changed, 0 insertions, 3 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index a6858ff..f221830 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -83,8 +83,6 @@ class GrobidClient(object): - status (slug) - error_msg (if status == 'error') - tei_xml (if status is 200) - - TODO: persist connection for performance? """ assert blob @@ -188,7 +186,6 @@ class GrobidClient(object): if len(ref.get("unstructured", "").strip()) <= 6: return False - # TODO: what other combinations are enough to skip parsing? if ( ref.get("year") and ref.get("author") |