From c37e552d2a05844d1bb84ae0b55b467fb9429229 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 1 Jul 2020 16:36:16 -0700 Subject: commit old example notes --- notes/cleanup_tasks.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 notes/cleanup_tasks.txt (limited to 'notes/cleanup_tasks.txt') diff --git a/notes/cleanup_tasks.txt b/notes/cleanup_tasks.txt new file mode 100644 index 00000000..bf418e59 --- /dev/null +++ b/notes/cleanup_tasks.txt @@ -0,0 +1,18 @@ + +Cambridge Chemical Database (NCI) + + doi_prefix:10.3406 release_type:article + + 193,346+ entities + + should be 'dataset' not 'article' + + datacite importer + +Frontiers + + Frontiers non-PDF abstracts, which have DOIs like `10.3389/conf.*`. Should + crawl these, but `release_type` should be... `abstract`? There are at least + 18,743 of these. Should be fixed in both crossref-bot, then a retro-active + cleanup. + -- cgit v1.2.3