From f729cf60399592e5e43233c1af65e26f968f5103 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 29 Jul 2021 18:53:43 +0200 Subject: tasks: reuse tag for filename date --- python/refcat/tasks.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'python') diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 78f1037..cc0cf9a 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 - """ Set of luigi tasks to derive a citation graph. @@ -185,16 +184,27 @@ import luigi from refcat.base import BaseTask, Zstd, shellout from refcat.settings import settings +# Directory structure will be like `base/tag/task/file.ext`, and we use an +# isodate (e.g. 2021-01-01) as a convention for tag. That way we can keep old +# pipeline results around, if needed. +# +# We also carry the date as a parameter in all tasks (we should probably get +# rid of it, it is not needed). In order to match the dates, we use the +# following date_from_tag parsing with fallback. +try: + date_from_tag = datetime.datetime.strptime(settings.TAG, "%Y-%m-%d").date() +except ValueError: + date_from_tag = datetime.date.today() + class Refcat(BaseTask): """ A base tasks for all refcat related tasks. """ BASE = settings.BASE - TAG = settings.TAG # e.g. "2021-07-28", but can be anything; TODO: converge on a pattern + TAG = settings.TAG # e.g. "2021-07-28", but can be anything; TODO: converge on a pattern or simplify! - date = luigi.DateParameter(default=datetime.date(2021, 7, 28), - description="a versioning help, will be part of filename, change this manually") + date = luigi.DateParameter(default=date_from_tag, description="a versioning help, will be part of filename") tmpdir = luigi.Parameter(default=settings.TMPDIR, description="set tempdir", significant=False) n = luigi.IntParameter(default=multiprocessing.cpu_count(), significant=False) -- cgit v1.2.3