<feed xmlns='http://www.w3.org/2005/Atom'>
<title>sandcrawler/notes, branch bnewbold-persist-grobid-errors</title>
<subtitle>[no description]</subtitle>
<id>https://git.bnewbold.net/sandcrawler/atom?h=bnewbold-persist-grobid-errors</id>
<link rel='self' href='https://git.bnewbold.net/sandcrawler/atom?h=bnewbold-persist-grobid-errors'/>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/'/>
<updated>2019-12-27T20:36:01+00:00</updated>
<entry>
<title>hadoop job log rename and update</title>
<updated>2019-12-27T20:36:01+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2019-12-27T20:36:01+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=3aa70adb3380e82a0a6964baa9058a41d8a2d454'/>
<id>urn:sha1:3aa70adb3380e82a0a6964baa9058a41d8a2d454</id>
<content type='text'>
</content>
</entry>
<entry>
<title>update job log with pig runs</title>
<updated>2019-12-27T05:35:36+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2019-12-27T05:35:36+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=172c426c4aa3fc3722813e32c08ee557c9b9d0cd'/>
<id>urn:sha1:172c426c4aa3fc3722813e32c08ee557c9b9d0cd</id>
<content type='text'>
</content>
</entry>
<entry>
<title>updated re-GROBID job log entry</title>
<updated>2019-11-15T23:53:28+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2019-11-15T23:53:28+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=e28125db2735b53e28ab5148379cb8b804c184c6'/>
<id>urn:sha1:e28125db2735b53e28ab5148379cb8b804c184c6</id>
<content type='text'>
</content>
</entry>
<entry>
<title>ingest/backfill notes</title>
<updated>2019-11-14T00:46:09+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2019-11-14T00:46:09+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=47e0b699b2a350b0081a64ebbcaba991c53cfb52'/>
<id>urn:sha1:47e0b699b2a350b0081a64ebbcaba991c53cfb52</id>
<content type='text'>
</content>
</entry>
<entry>
<title>notes about running 'regrobid' batches manually (not kafka)</title>
<updated>2019-11-14T00:45:04+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2019-11-14T00:45:04+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=885bff50bbe57322ad32f4fbfab8d846e54671f2'/>
<id>urn:sha1:885bff50bbe57322ad32f4fbfab8d846e54671f2</id>
<content type='text'>
</content>
</entry>
<entry>
<title>commit old notes about munging GROBID output</title>
<updated>2019-11-14T00:44:42+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2019-11-14T00:44:42+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=0e971f17f0f6d377a341825f141338b3a1e0df56'/>
<id>urn:sha1:0e971f17f0f6d377a341825f141338b3a1e0df56</id>
<content type='text'>
</content>
</entry>
<entry>
<title>old groupworks job log</title>
<updated>2019-09-21T03:05:46+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2019-09-21T03:05:46+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=edfc7329090b3bd84050c5efb293ebb4bdc73bf0'/>
<id>urn:sha1:edfc7329090b3bd84050c5efb293ebb4bdc73bf0</id>
<content type='text'>
</content>
</entry>
<entry>
<title>petabox journal files ingest updates</title>
<updated>2019-06-20T22:38:11+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2019-06-20T22:38:11+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=e0d9aaeedc9e8b9d791a72fc8e91a4869078d6f2'/>
<id>urn:sha1:e0d9aaeedc9e8b9d791a72fc8e91a4869078d6f2</id>
<content type='text'>
</content>
</entry>
<entry>
<title>clearer CDX munge notes</title>
<updated>2019-05-10T00:47:58+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2019-05-10T00:47:58+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=9d518593633fac490b47f67544787454dc69f1bf'/>
<id>urn:sha1:9d518593633fac490b47f67544787454dc69f1bf</id>
<content type='text'>
</content>
</entry>
<entry>
<title>give sort way more RAM by default</title>
<updated>2019-02-01T23:13:32+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2019-02-01T23:13:32+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=52967e05d2c8febdaa0426634fa987eaf5f58577'/>
<id>urn:sha1:52967e05d2c8febdaa0426634fa987eaf5f58577</id>
<content type='text'>
</content>
</entry>
</feed>
