diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-03 16:44:42 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-03 16:52:44 -0700 |
commit | efcc947482e9f281bb9c3f3484c29d2c8deffe09 (patch) | |
tree | 67a36d7c4a70778ccabe3bc787dd2b8ca1f28b8e | |
parent | 0af742bc24f39193a04c913b8abd7258fe10708a (diff) | |
download | fatcat-efcc947482e9f281bb9c3f3484c29d2c8deffe09.tar.gz fatcat-efcc947482e9f281bb9c3f3484c29d2c8deffe09.zip |
cleanups: create a separate JsonLinePusher for cleanup workers (distinct base class)
-rwxr-xr-x | python/fatcat_cleanup.py | 3 | ||||
-rw-r--r-- | python/fatcat_tools/cleanups/__init__.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/cleanups/common.py | 19 |
3 files changed, 20 insertions, 4 deletions
diff --git a/python/fatcat_cleanup.py b/python/fatcat_cleanup.py index 8bcc2ea9..b8d0ee4d 100755 --- a/python/fatcat_cleanup.py +++ b/python/fatcat_cleanup.py @@ -7,8 +7,7 @@ import sys import raven from fatcat_tools import authenticated_api -from fatcat_tools.cleanups import FileCleaner -from fatcat_tools.importers import JsonLinePusher +from fatcat_tools.cleanups import FileCleaner, JsonLinePusher # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable sentry_client = raven.Client() diff --git a/python/fatcat_tools/cleanups/__init__.py b/python/fatcat_tools/cleanups/__init__.py index 0aeec977..d9438f06 100644 --- a/python/fatcat_tools/cleanups/__init__.py +++ b/python/fatcat_tools/cleanups/__init__.py @@ -1,2 +1,2 @@ -from .common import EntityCleaner +from .common import EntityCleaner, JsonLinePusher from .files import FileCleaner diff --git a/python/fatcat_tools/cleanups/common.py b/python/fatcat_tools/cleanups/common.py index 4e8e49fd..c8ca5800 100644 --- a/python/fatcat_tools/cleanups/common.py +++ b/python/fatcat_tools/cleanups/common.py @@ -1,8 +1,9 @@ import copy import json import subprocess +import sys from collections import Counter -from typing import Any, Dict, List +from typing import Any, Dict, List, Sequence from fatcat_openapi_client import ApiClient, Editgroup @@ -143,3 +144,19 @@ class EntityCleaner: assert self._editgroup_id return self._editgroup_id + + +class JsonLinePusher: + def __init__(self, cleaner: EntityCleaner, json_file: Sequence, **kwargs) -> None: + self.cleaner = cleaner + self.json_file = json_file + + def run(self) -> Counter: + for line in self.json_file: + if not line: + continue + record = json.loads(line) + self.cleaner.push_record(record) + counts = self.cleaner.finish() + print(counts, file=sys.stderr) + return counts |