<feed xmlns='http://www.w3.org/2005/Atom'>
<title>sandcrawler/notes/tasks, branch bnewbold-refactor-loggging</title>
<subtitle>[no description]</subtitle>
<id>https://git.bnewbold.net/sandcrawler/atom?h=bnewbold-refactor-loggging</id>
<link rel='self' href='https://git.bnewbold.net/sandcrawler/atom?h=bnewbold-refactor-loggging'/>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/'/>
<updated>2022-05-04T00:15:58+00:00</updated>
<entry>
<title>finished re-GROBID-ing</title>
<updated>2022-05-04T00:15:58+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-05-04T00:15:58+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=621fca6245a5362cead33f71e83d0003aae42cf4'/>
<id>urn:sha1:621fca6245a5362cead33f71e83d0003aae42cf4</id>
<content type='text'>
</content>
</entry>
<entry>
<title>PDF URL lists update</title>
<updated>2022-05-04T00:15:18+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-05-04T00:14:08+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=ac7c44d332fcba83faae6a3e732c3415f6ab78a6'/>
<id>urn:sha1:ac7c44d332fcba83faae6a3e732c3415f6ab78a6</id>
<content type='text'>
</content>
</entry>
<entry>
<title>.ua crawling follow-up stats</title>
<updated>2022-04-26T22:27:54+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-04-26T22:27:54+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=c0db231f1eebcf3acd78f0bf759e3df84e1d3b79'/>
<id>urn:sha1:c0db231f1eebcf3acd78f0bf759e3df84e1d3b79</id>
<content type='text'>
</content>
</entry>
<entry>
<title>.ua ingest notes</title>
<updated>2022-04-05T00:12:00+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-04-05T00:12:00+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=e2a98a0d47f307e59e2b50f0a3945a2a4f9caaea'/>
<id>urn:sha1:e2a98a0d47f307e59e2b50f0a3945a2a4f9caaea</id>
<content type='text'>
</content>
</entry>
<entry>
<title>various ingest/task notes</title>
<updated>2022-03-22T23:03:46+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-03-22T23:03:46+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=d3638a9fd9ed11fb4484038852f8e02b2f5a7b41'/>
<id>urn:sha1:d3638a9fd9ed11fb4484038852f8e02b2f5a7b41</id>
<content type='text'>
</content>
</entry>
<entry>
<title>partial notes on .ua urgent crawling</title>
<updated>2022-03-11T20:05:29+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-03-11T20:05:29+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=9d096b26e35802553263d6472a534deb381e65da'/>
<id>urn:sha1:9d096b26e35802553263d6472a534deb381e65da</id>
<content type='text'>
</content>
</entry>
<entry>
<title>enqueue PLATFORM PDFs for crawl</title>
<updated>2022-01-08T02:02:09+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-01-08T02:02:09+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=4e8407758618bece136addffe301ba8357366de3'/>
<id>urn:sha1:4e8407758618bece136addffe301ba8357366de3</id>
<content type='text'>
</content>
</entry>
<entry>
<title>document progress on re-GROBID-ing</title>
<updated>2022-01-05T19:54:49+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-01-05T19:54:49+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=65a0d38bcedca0610ca6fa8e053199f324062ace'/>
<id>urn:sha1:65a0d38bcedca0610ca6fa8e053199f324062ace</id>
<content type='text'>
</content>
</entry>
<entry>
<title>notes on re-GROBID-ing (and re-extracting) some files</title>
<updated>2021-12-09T22:12:18+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-12-09T22:12:18+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=e5c021bfeb03c50924160616dc64d44617d45933'/>
<id>urn:sha1:e5c021bfeb03c50924160616dc64d44617d45933</id>
<content type='text'>
</content>
</entry>
<entry>
<title>wrap up crossref refs backfill notes</title>
<updated>2021-11-11T01:25:34+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-11-11T01:25:34+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=bdccd79d741cab89cd28202a352044ed55624503'/>
<id>urn:sha1:bdccd79d741cab89cd28202a352044ed55624503</id>
<content type='text'>
</content>
</entry>
</feed>
