From 752be5426178f4730326c6ca89d1fc20135a2509 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 4 Feb 2022 20:08:25 -0800 Subject: bulk metadata edit log --- extra/bulk_edits/2022-02-04_zenodo_spam.md | 64 ++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 extra/bulk_edits/2022-02-04_zenodo_spam.md (limited to 'extra/bulk_edits/2022-02-04_zenodo_spam.md') diff --git a/extra/bulk_edits/2022-02-04_zenodo_spam.md b/extra/bulk_edits/2022-02-04_zenodo_spam.md new file mode 100644 index 00000000..a95f5180 --- /dev/null +++ b/extra/bulk_edits/2022-02-04_zenodo_spam.md @@ -0,0 +1,64 @@ + +## Cleanup Zenodo Spam + + fatcat-cli search releases 'year:2021 title:"DOWNLOAD MP3:" doi_prefix:10.5281 !journal:* in_ia:false' --count + # 29653 + + fatcat-cli search releases 'year:2021 title:"DOWNLOAD MP3:" title:"album download" doi_prefix:10.5281 !journal:* in_ia:false' --count + # 29043 + +Let's nuke 'em: + + # start small, not automatic + fatcat-cli search releases 'year:2021 title:"DOWNLOAD MP3:" title:"album download" doi_prefix:10.5281 !journal:* in_ia:false !release_type:stub' --entity-json --limit 50 \ + | jq 'select(.release_type != "stub")' -c \ + | fatcat-cli batch update release release_type=stub withdrawn_status=spam --description "Mark Zenodo spam as such" + # editgroup_yizvqtz24bfv3jd6vmawiqnojm + + # ok, scale it up! + fatcat-cli search releases 'year:2021 title:"DOWNLOAD MP3:" title:"album download" doi_prefix:10.5281 !journal:* in_ia:false !release_type:stub' --entity-json --limit 30000 \ + | jq 'select(.release_type != "stub")' -c \ + | pv -l \ + | fatcat-cli batch update release release_type=stub withdrawn_status=spam --description "Mark Zenodo spam as such" --auto-accept + +Another pattern: + + fatcat-cli search releases 'year:2021 title:"download album" title:"zip mp3" author:download doi_prefix:10.5281 !journal:* in_ia:false !release_type:stub' --count + # 14376 + + fatcat-cli search releases 'year:2021 title:"download album" title:"zip mp3" author:download doi_prefix:10.5281 !journal:* in_ia:false !release_type:stub' --entity-json --limit 30000 \ + | jq 'select(.release_type != "stub")' -c \ + | pv -l \ + | fatcat-cli batch update release release_type=stub withdrawn_status=spam --description "Mark Zenodo spam as such" --auto-accept + +Did some manual patterns. + +Another pattern; checking manually all looks like spam: + + fatcat-cli search releases 'title:"live stream free" doi_prefix:10.5281 !type:stub' --count + # 176 + + fatcat-cli search releases 'title:"live stream free" doi_prefix:10.5281 !type:stub' --entity-json --limit 200 \ + | jq 'select(.release_type != "stub")' -c \ + | pv -l \ + | fatcat-cli batch update release release_type=stub withdrawn_status=spam --description "Mark Zenodo spam as such" --auto-accept + # done + +Another large pattern: + + fatcat-cli search releases 'year:2021 title:"Full Album Download" title:mp3 author:download doi_prefix:10.5281 !journal:* in_ia:false !release_type:stub' --count + # 14800 + + fatcat-cli search releases 'year:2021 title:"Full Album Download" title:mp3 author:download doi_prefix:10.5281 !journal:* in_ia:false !release_type:stub' --entity-json --limit 15000 \ + | jq 'select(.release_type != "stub")' -c \ + | pv -l \ + | fatcat-cli batch update release release_type=stub withdrawn_status=spam --description "Mark Zenodo spam as such" --auto-accept + +Small pattern: + + fatcat-cli search releases 'gomovies !release_type:stub' --entity-json --limit 50 \ + | jq 'select(.release_type != "stub")' -c \ + | pv -l \ + | fatcat-cli batch update release release_type=stub withdrawn_status=spam --description "Mark Zenodo spam as such" + # editgroup_fzumcytmljfebldd5az643wqmi + -- cgit v1.2.3