From 78caa0d7772375903194e79df16d70d831ebd432 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 4 Apr 2018 11:52:55 -0700 Subject: pep8 --- extraction/extraction_cdx_grobid.py | 4 ++-- extraction/grobid2json.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/extraction/extraction_cdx_grobid.py b/extraction/extraction_cdx_grobid.py index 54d8b71..27668ea 100755 --- a/extraction/extraction_cdx_grobid.py +++ b/extraction/extraction_cdx_grobid.py @@ -147,7 +147,7 @@ class MRExtractCdxGrobid(MrJob): # Fetch data from WARCs in petabox try: - rstore = ResourceStore(loaderfactory=CDXLoaderFactory()) + rstore = ResourceStore(loaderfactory=CDXLoaderFactory()) gwb_record = rstore.load_resource( info['file:cdx']['warc'], info['file:cdx']['offset'], @@ -219,7 +219,7 @@ class MRExtractCdxGrobid(MrJob): if info is None: self.increment_counter('lines', status['status']) return _, status - + # Decide what to bother inserting back into HBase # Particularly: ('f:c', 'file:mime', 'file:size', 'file:cdx') grobid_status = info.get('grobid0:status_code', None) diff --git a/extraction/grobid2json.py b/extraction/grobid2json.py index 874ac8b..daf9387 100755 --- a/extraction/grobid2json.py +++ b/extraction/grobid2json.py @@ -107,7 +107,7 @@ def do_tei(path, encumbered=True): el = tei.find('.//{%s}back/{%s}div[@type="annex"]' % (ns, ns)) info['annex'] = (el or None) and " ".join(el.itertext()).strip() - return info + return info def main(): parser = argparse.ArgumentParser( -- cgit v1.2.3