diff options
-rw-r--r-- | python/refcat/tasks.py | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 78f1037..cc0cf9a 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 - """ Set of luigi tasks to derive a citation graph. @@ -185,16 +184,27 @@ import luigi from refcat.base import BaseTask, Zstd, shellout from refcat.settings import settings +# Directory structure will be like `base/tag/task/file.ext`, and we use an +# isodate (e.g. 2021-01-01) as a convention for tag. That way we can keep old +# pipeline results around, if needed. +# +# We also carry the date as a parameter in all tasks (we should probably get +# rid of it, it is not needed). In order to match the dates, we use the +# following date_from_tag parsing with fallback. +try: + date_from_tag = datetime.datetime.strptime(settings.TAG, "%Y-%m-%d").date() +except ValueError: + date_from_tag = datetime.date.today() + class Refcat(BaseTask): """ A base tasks for all refcat related tasks. """ BASE = settings.BASE - TAG = settings.TAG # e.g. "2021-07-28", but can be anything; TODO: converge on a pattern + TAG = settings.TAG # e.g. "2021-07-28", but can be anything; TODO: converge on a pattern or simplify! - date = luigi.DateParameter(default=datetime.date(2021, 7, 28), - description="a versioning help, will be part of filename, change this manually") + date = luigi.DateParameter(default=date_from_tag, description="a versioning help, will be part of filename") tmpdir = luigi.Parameter(default=settings.TMPDIR, description="set tempdir", significant=False) n = luigi.IntParameter(default=multiprocessing.cpu_count(), significant=False) |