diff options
Diffstat (limited to 'extra/bulk_edits/2022-04-20_isiarticles.md')
-rw-r--r-- | extra/bulk_edits/2022-04-20_isiarticles.md | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/extra/bulk_edits/2022-04-20_isiarticles.md b/extra/bulk_edits/2022-04-20_isiarticles.md new file mode 100644 index 00000000..ca2cc6f9 --- /dev/null +++ b/extra/bulk_edits/2022-04-20_isiarticles.md @@ -0,0 +1,26 @@ + +See metadata cleanups for context. Basically a couple tens of thousands of sample/spam articles hosted on the domain isiarticles.com. + +## Prod Updates + +Start small: + + export FATCAT_API_HOST=https://api.fatcat.wiki + export FATCAT_AUTH_WORKER_CLEANUP=[...] + export FATCAT_API_AUTH_TOKEN=$FATCAT_AUTH_WORKER_CLEANUP + + fatcat-cli search file domain:isiarticles.com --entity-json -n0 \ + | rg -v '"content_scope"' \ + | rg 'isiarticles.com/' \ + | head -n50 \ + | pv -l \ + | fatcat-cli batch update file release_ids= content_scope=sample --description 'Un-link and mark isiarticles PDFs as content_scope=sample' --auto-accept + # editgroup_ihx75kzsebgzfisgjrv67zew5e + +The full batch: + + fatcat-cli search file domain:isiarticles.com --entity-json -n0 \ + | rg -v '"content_scope"' \ + | rg 'isiarticles.com/' \ + | pv -l \ + | fatcat-cli batch update file release_ids= content_scope=sample --description 'Un-link and mark isiarticles PDFs as content_scope=sample' --auto-accept |