diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-09-22 22:53:50 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-09-22 22:53:50 -0700 |
commit | 7159fdf1ec55a4c9c096afb5eb1ce57b9a51f1e8 (patch) | |
tree | 0d9f0b46ae9c298d56a910434fe5a0c658d27361 /python/filter_grobid_metadata.py | |
parent | 4a5912e23ae8d58edad64931ed290779c0e1689c (diff) | |
download | sandcrawler-7159fdf1ec55a4c9c096afb5eb1ce57b9a51f1e8.tar.gz sandcrawler-7159fdf1ec55a4c9c096afb5eb1ce57b9a51f1e8.zip |
longtail grobid metadata parse/filter WIP
Diffstat (limited to 'python/filter_grobid_metadata.py')
-rwxr-xr-x | python/filter_grobid_metadata.py | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/python/filter_grobid_metadata.py b/python/filter_grobid_metadata.py new file mode 100755 index 0000000..7f619db --- /dev/null +++ b/python/filter_grobid_metadata.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 + +import sys +import json + +def grobid_ok(obj): + return True + +def run(): + for line in sys.stdin: + obj = json.loads(line) + if grobid_ok(obj): + print(line.strip()) + +if __name__=="__main__": + run() |