aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/test_misc.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-09-26 12:00:01 -0700
committerBryan Newbold <bnewbold@archive.org>2019-09-26 12:00:01 -0700
commit37bf997dc0220a30605249655056e90f04e33366 (patch)
tree3f6a3586462d25c02b5fd219b0c754aef2976e3c /python/tests/test_misc.py
parentc3c5a6ef57e83ff4395f9f87e7e372c6c371e4a5 (diff)
downloadsandcrawler-37bf997dc0220a30605249655056e90f04e33366.tar.gz
sandcrawler-37bf997dc0220a30605249655056e90f04e33366.zip
lots of grobid tool implementation (still WIP)
Diffstat (limited to 'python/tests/test_misc.py')
-rw-r--r--python/tests/test_misc.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/python/tests/test_misc.py b/python/tests/test_misc.py
index 02deec9..420bc07 100644
--- a/python/tests/test_misc.py
+++ b/python/tests/test_misc.py
@@ -50,9 +50,9 @@ def test_parse_cdx_line():
'surt': "edu,upenn,ldc)/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf",
'url': "https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf",
'datetime': "20170828233154",
- 'warc': "SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828231135742-00000-00009-wbgrp-svc284/SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828232253025-00005-3480~wbgrp-svc284.us.archive.org~8443.warc.gz",
- 'offset': 931661233,
- 'c_size': 210251,
+ 'warc_path': "SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828231135742-00000-00009-wbgrp-svc284/SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828232253025-00005-3480~wbgrp-svc284.us.archive.org~8443.warc.gz",
+ 'warc_offset': 931661233,
+ 'warc_csize': 210251,
'http_status': 200,
}