aboutsummaryrefslogtreecommitdiffstats
path: root/python/filter_grobid_metadata.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-09-22 22:53:50 -0700
committerBryan Newbold <bnewbold@archive.org>2018-09-22 22:53:50 -0700
commit7159fdf1ec55a4c9c096afb5eb1ce57b9a51f1e8 (patch)
tree0d9f0b46ae9c298d56a910434fe5a0c658d27361 /python/filter_grobid_metadata.py
parent4a5912e23ae8d58edad64931ed290779c0e1689c (diff)
downloadsandcrawler-7159fdf1ec55a4c9c096afb5eb1ce57b9a51f1e8.tar.gz
sandcrawler-7159fdf1ec55a4c9c096afb5eb1ce57b9a51f1e8.zip
longtail grobid metadata parse/filter WIP
Diffstat (limited to 'python/filter_grobid_metadata.py')
-rwxr-xr-xpython/filter_grobid_metadata.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/python/filter_grobid_metadata.py b/python/filter_grobid_metadata.py
new file mode 100755
index 0000000..7f619db
--- /dev/null
+++ b/python/filter_grobid_metadata.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python3
+
+import sys
+import json
+
+def grobid_ok(obj):
+ return True
+
+def run():
+ for line in sys.stdin:
+ obj = json.loads(line)
+ if grobid_ok(obj):
+ print(line.strip())
+
+if __name__=="__main__":
+ run()