From ca75f7295c3f5383534b25069ec1e64e4064cef6 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sun, 11 Oct 2020 21:42:24 -0700 Subject: OAI-PMH ingest progress timestamps --- notes/ingest/2020-05_oai_pmh.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'notes') diff --git a/notes/ingest/2020-05_oai_pmh.md b/notes/ingest/2020-05_oai_pmh.md index de9bfba..fe22c75 100644 --- a/notes/ingest/2020-05_oai_pmh.md +++ b/notes/ingest/2020-05_oai_pmh.md @@ -192,6 +192,19 @@ And went from about 42,826,313 rows to 31,773,874 unique URLs to crawl, so expecting at least 11,052,439 `no-capture` ingest results (and should probably filter for these or even delete from the ingest request table). +Ingest progress: + + 2020-08-05 14:02: 32,571,018 + 2020-08-06 13:49: 31,195,169 + 2020-08-07 10:11: 29,986,169 + 2020-08-10 10:43: 26,497,196 + 2020-08-12 11:02: 23,811,845 + 2020-08-17 13:34: 19,460,502 + 2020-08-20 09:49: 15,069,507 + 2020-08-25 09:56: 9,397,035 + 2020-09-02 15:02: 305,889 (72k longest queue) + 2020-09-03 14:30: done + ## Post-ingest stats SELECT ingest_file_result.status, COUNT(*) -- cgit v1.2.3