<feed xmlns='http://www.w3.org/2005/Atom'>
<title>sandcrawler/notes, branch bnewbold-refactor-loggging</title>
<subtitle>[no description]</subtitle>
<id>https://git.bnewbold.net/sandcrawler/atom?h=bnewbold-refactor-loggging</id>
<link rel='self' href='https://git.bnewbold.net/sandcrawler/atom?h=bnewbold-refactor-loggging'/>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/'/>
<updated>2022-07-07T20:19:40+00:00</updated>
<entry>
<title>ingest: targeted 2022-04 notes</title>
<updated>2022-07-07T20:19:40+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-07-07T20:19:40+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=8f85ab294eae50e31efa9e31bb0bca1bca76cf8b'/>
<id>urn:sha1:8f85ab294eae50e31efa9e31bb0bca1bca76cf8b</id>
<content type='text'>
</content>
</entry>
<entry>
<title>finished re-GROBID-ing</title>
<updated>2022-05-04T00:15:58+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-05-04T00:15:58+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=621fca6245a5362cead33f71e83d0003aae42cf4'/>
<id>urn:sha1:621fca6245a5362cead33f71e83d0003aae42cf4</id>
<content type='text'>
</content>
</entry>
<entry>
<title>PDF URL lists update</title>
<updated>2022-05-04T00:15:18+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-05-04T00:14:08+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=ac7c44d332fcba83faae6a3e732c3415f6ab78a6'/>
<id>urn:sha1:ac7c44d332fcba83faae6a3e732c3415f6ab78a6</id>
<content type='text'>
</content>
</entry>
<entry>
<title>more dataset crawl notes</title>
<updated>2022-04-26T22:29:57+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-04-26T22:29:57+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=d3e30483fbfba5c57f86240d351de3580f5ae6fa'/>
<id>urn:sha1:d3e30483fbfba5c57f86240d351de3580f5ae6fa</id>
<content type='text'>
</content>
</entry>
<entry>
<title>.ua crawling follow-up stats</title>
<updated>2022-04-26T22:27:54+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-04-26T22:27:54+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=c0db231f1eebcf3acd78f0bf759e3df84e1d3b79'/>
<id>urn:sha1:c0db231f1eebcf3acd78f0bf759e3df84e1d3b79</id>
<content type='text'>
</content>
</entry>
<entry>
<title>start notes on unpaywall and targeted/patch crawls</title>
<updated>2022-04-20T23:32:43+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-04-20T23:32:43+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=47a064c2cc10874aed3a3de7160c92d51039a2a8'/>
<id>urn:sha1:47a064c2cc10874aed3a3de7160c92d51039a2a8</id>
<content type='text'>
</content>
</entry>
<entry>
<title>.ua ingest notes</title>
<updated>2022-04-05T00:12:00+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-04-05T00:12:00+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=e2a98a0d47f307e59e2b50f0a3945a2a4f9caaea'/>
<id>urn:sha1:e2a98a0d47f307e59e2b50f0a3945a2a4f9caaea</id>
<content type='text'>
</content>
</entry>
<entry>
<title>various ingest/task notes</title>
<updated>2022-03-22T23:03:46+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-03-22T23:03:46+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=d3638a9fd9ed11fb4484038852f8e02b2f5a7b41'/>
<id>urn:sha1:d3638a9fd9ed11fb4484038852f8e02b2f5a7b41</id>
<content type='text'>
</content>
</entry>
<entry>
<title>DOAJ ingest/crawl notes</title>
<updated>2022-03-11T20:24:19+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-03-11T20:24:19+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=257f41b174e04957aecf298b3ecdaae0ab44a1d2'/>
<id>urn:sha1:257f41b174e04957aecf298b3ecdaae0ab44a1d2</id>
<content type='text'>
</content>
</entry>
<entry>
<title>partial notes on .ua urgent crawling</title>
<updated>2022-03-11T20:05:29+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2022-03-11T20:05:29+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=9d096b26e35802553263d6472a534deb381e65da'/>
<id>urn:sha1:9d096b26e35802553263d6472a534deb381e65da</id>
<content type='text'>
</content>
</entry>
</feed>
