aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/refcat/tasks.py18
1 files changed, 14 insertions, 4 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 78f1037..cc0cf9a 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
-
"""
Set of luigi tasks to derive a citation graph.
@@ -185,16 +184,27 @@ import luigi
from refcat.base import BaseTask, Zstd, shellout
from refcat.settings import settings
+# Directory structure will be like `base/tag/task/file.ext`, and we use an
+# isodate (e.g. 2021-01-01) as a convention for tag. That way we can keep old
+# pipeline results around, if needed.
+#
+# We also carry the date as a parameter in all tasks (we should probably get
+# rid of it, it is not needed). In order to match the dates, we use the
+# following date_from_tag parsing with fallback.
+try:
+ date_from_tag = datetime.datetime.strptime(settings.TAG, "%Y-%m-%d").date()
+except ValueError:
+ date_from_tag = datetime.date.today()
+
class Refcat(BaseTask):
"""
A base tasks for all refcat related tasks.
"""
BASE = settings.BASE
- TAG = settings.TAG # e.g. "2021-07-28", but can be anything; TODO: converge on a pattern
+ TAG = settings.TAG # e.g. "2021-07-28", but can be anything; TODO: converge on a pattern or simplify!
- date = luigi.DateParameter(default=datetime.date(2021, 7, 28),
- description="a versioning help, will be part of filename, change this manually")
+ date = luigi.DateParameter(default=date_from_tag, description="a versioning help, will be part of filename")
tmpdir = luigi.Parameter(default=settings.TMPDIR, description="set tempdir", significant=False)
n = luigi.IntParameter(default=multiprocessing.cpu_count(), significant=False)