<feed xmlns='http://www.w3.org/2005/Atom'>
<title>sandcrawler/notes/ingest, branch trawler</title>
<subtitle>[no description]</subtitle>
<id>https://git.bnewbold.net/sandcrawler/atom?h=trawler</id>
<link rel='self' href='https://git.bnewbold.net/sandcrawler/atom?h=trawler'/>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/'/>
<updated>2021-12-02T03:06:00+00:00</updated>
<entry>
<title>commit old patch crawl notes</title>
<updated>2021-12-02T03:06:00+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-12-02T03:06:00+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=85a9c9008ab66680047fb151996c55566d56cbe3'/>
<id>urn:sha1:85a9c9008ab66680047fb151996c55566d56cbe3</id>
<content type='text'>
</content>
</entry>
<entry>
<title>daily OA crawl improvements/notes</title>
<updated>2021-09-08T19:16:44+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-09-08T19:16:44+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=ce25d59845083ca0beab98144b0c43bfc4254d6d'/>
<id>urn:sha1:ce25d59845083ca0beab98144b0c43bfc4254d6d</id>
<content type='text'>
</content>
</entry>
<entry>
<title>OAI-PMH patch and ingest improvement notes</title>
<updated>2021-09-04T01:34:33+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-09-04T01:34:33+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=d749a7a6a1c1d439596c5d053daf904638b4dbc2'/>
<id>urn:sha1:d749a7a6a1c1d439596c5d053daf904638b4dbc2</id>
<content type='text'>
</content>
</entry>
<entry>
<title>commit old patch crawl notes (dec 2020)</title>
<updated>2021-09-04T01:33:23+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-09-04T01:33:23+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=c147ad35fa8ec59b8c015f8badae67c525f65253'/>
<id>urn:sha1:c147ad35fa8ec59b8c015f8badae67c525f65253</id>
<content type='text'>
</content>
</entry>
<entry>
<title>commit old arxiv ingest notes</title>
<updated>2021-09-04T01:32:39+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-09-04T01:32:39+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=c3cbab57fc5b27a5add399dd27dff0a91c9d9fa1'/>
<id>urn:sha1:c3cbab57fc5b27a5add399dd27dff0a91c9d9fa1</id>
<content type='text'>
</content>
</entry>
<entry>
<title>commit old patch notes (will rework)</title>
<updated>2021-09-03T17:36:49+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-09-03T16:04:55+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=f074a6aafd9af06866829d35555afe10286126fb'/>
<id>urn:sha1:f074a6aafd9af06866829d35555afe10286126fb</id>
<content type='text'>
</content>
</entry>
<entry>
<title>MAG post-crawl stats (5m+ new PDFs crawled successfully)</title>
<updated>2021-09-02T23:31:23+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-09-02T23:31:23+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=ffd6cd86bb8a4756d123decaa5f2ef03428f208f'/>
<id>urn:sha1:ffd6cd86bb8a4756d123decaa5f2ef03428f208f</id>
<content type='text'>
</content>
</entry>
<entry>
<title>MAG and OAI-PMH crawl/processing notes</title>
<updated>2021-08-13T20:57:57+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-08-13T20:57:57+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=eab9b929a05da3fa25f4bfaffd84bb0d7b219c73'/>
<id>urn:sha1:eab9b929a05da3fa25f4bfaffd84bb0d7b219c73</id>
<content type='text'>
</content>
</entry>
<entry>
<title>2021-07 unpaywall crawl wrap-up notes</title>
<updated>2021-07-30T16:33:24+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-07-30T16:33:24+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=ccb2c72c170d6736af675734906c8957ee176a8b'/>
<id>urn:sha1:ccb2c72c170d6736af675734906c8957ee176a8b</id>
<content type='text'>
</content>
</entry>
<entry>
<title>unpaywall 2021-07 crawl partial notes</title>
<updated>2021-07-14T22:45:53+00:00</updated>
<author>
<name>Bryan Newbold</name>
<email>bnewbold@archive.org</email>
</author>
<published>2021-07-14T22:45:53+00:00</published>
<link rel='alternate' type='text/html' href='https://git.bnewbold.net/sandcrawler/commit/?id=23374f2d10914e06c67e7c0c1f9c37ba98e36eeb'/>
<id>urn:sha1:23374f2d10914e06c67e7c0c1f9c37ba98e36eeb</id>
<content type='text'>
</content>
</entry>
</feed>
