From c81d580e5aaecf68116928702a1bf56b25d669fc Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Wed, 19 May 2021 21:24:11 +0200
Subject: tasks: update docs

---
 python/refcat/tasks.py | 64 +++++++++++++++++++-------------------------------
 1 file changed, 24 insertions(+), 40 deletions(-)

diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index d3c658d..57f961b 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -1,26 +1,10 @@
 #!/usr/bin/env python3
 """
-Set of luigi tasks to extract and build various derivations from fatcat
-reference data. Reference data can come from metadata or ML extraction
-(grobid).
-
-Rationale is that we successively want to build a derived dataset (citation
-graph and various reports) and we expect the data to be messy and partial. We
-want to cache intermediate results for a uniform structure of outputs.
-
-Notes.
-
-We use a single zstd-compressed input file for easier handling (albeit splits
-would be run in parallel by luigi). A sha1sum takes 67 minutes.
-
-Outputs should be compressed as well to save space.
-
-Use TMPDIR=... to adjust temporary directory.
-
-To list available tasks, there is a convenience "run.sh" in the repo:
+Set of luigi tasks to derive a citation graph.
 
     $ refcat.pyz
 
+
                   ____           __
        ________  / __/________ _/ /_
       / ___/ _ \/ /_/ ___/ __ `/ __/
@@ -29,35 +13,35 @@ To list available tasks, there is a convenience "run.sh" in the repo:
 
     Command line entry point for running various data tasks.
 
-        $ refcat [COMMAND | TASK] [OPTIONS]
+        $ refcat.pyz [COMMAND | TASK] [OPTIONS]
+
+    Commands: ls, ll, deps, tasks, files, config, cat, completion
 
-    Commands: ls, ll, deps
+    To install completion run:
+
+        $ source <(refcat.pyz completion)
 
     VERSION   0.1.0
     SETTINGS  /home/tir/.config/refcat/settings.ini
-    BASE      /magna/.cache
-    TMPDIR    /magna/tmp
-
-    BiblioRef                  RGSitemapToRelease         RefsKeyStats
-    BiblioRefFromJoin          Refcat                     RefsPMCID
-    BiblioRefFuzzy             Refs                       RefsPMID
-    CommonDOI                  RefsArxiv                  RefsReleasesMerged
-    CommonTitles               RefsCounter                RefsSortedKeys
-    CommonTitlesLower          RefsDOI                    RefsTitleFrequency
-    FatcatArxiv                RefsFatcatArxivJoin        RefsTitles
-    FatcatDOI                  RefsFatcatClusterVerify    RefsTitlesLower
-    FatcatPMCID                RefsFatcatClusters         RefsToRelease
-    FatcatPMID                 RefsFatcatDOIJoin          ReleaseExportExpanded
-    FatcatSortedKeys           RefsFatcatGroupJoin        ReleaseExportReduced
-    FatcatTitles               RefsFatcatPMCIDJoin        ReleaseExportTitleOnly
-    FatcatTitlesLower          RefsFatcatPMIDJoin         URLList
-    KeyDistribution            RefsFatcatRanked           URLTabs
-    RGSitemapFatcatMerged      RefsFatcatSortedKeys
-    RGSitemapFatcatSortedKeys  RefsFatcatTitleLowerJoin
+    BASE      /bigger/.cache
+    TMPDIR    /bigger/tmp
+
+    BrefZipDOI                        RefsDOI
+    FatcatArxiv                       RefsPMCID
+    FatcatDOI                         RefsPMID
+    FatcatMapped                      RefsToRelease
+    FatcatPMCID                       RefsWithUnstructured
+    FatcatPMID                        ReleaseExportExpanded
+    MAGPapers                         ReleaseExportReduced
+    OpenLibraryDump                   URLList
+    Refcat                            URLTabs
+    Refs                              UnmatchedRefs
+    RefsArxiv                         WikipediaCitationsMinimalDataset
+
 
 Config (e.g. raw input data) taken from $HOME/.config/refcat/settings.ini.
 
---------
+=================
 
 TODO and report notes
 
-- 
cgit v1.2.3