aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/notes/version_4.md15
-rw-r--r--python/refcat/tasks.py22
2 files changed, 37 insertions, 0 deletions
diff --git a/python/notes/version_4.md b/python/notes/version_4.md
index 7b45083..a78f9e1 100644
--- a/python/notes/version_4.md
+++ b/python/notes/version_4.md
@@ -73,3 +73,18 @@ Example:
* https://openlibrary.org/works/OL488811W/The_debt?edition=debtwhatamerica000robi
+```
+{
+ "biblio": {
+ "container_name": "BLACK AFRICA: The Economic and Cultural Basis for a Federated State",
+ "unstructured": "For details on African Renaissance see Cheikh Anta Diop, BLACK AFRICA: The Economic and Cultural Basis for a Federated State, New Expanded Edition. Trenton, NJ: Africa World Press, 1987.",
+ "year": 1987
+ },
+ "index": 28,
+ "key": "8_CR29",
+ "ref_source": "crossref",
+ "release_year": 2009,
+ "release_ident": "2igycuiobvhxrcmmrzz6anufuq",
+ "work_ident": "aaacj23jqbdxvajwj5kc6jpejq"
+}
+```
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index a82c535..faf76fd 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -685,3 +685,25 @@ class BrefZipFuzzy(Refcat):
def output(self):
return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd)
+
+
+class OpenLibraryWorks(Refcat):
+ """
+ Extract just the works.
+ """
+ def requires(self):
+ return OpenLibraryDump()
+
+ def run(self):
+ output = shellout("""
+ zstdcat -T0 {input} |
+ parallel -j {n} --block 10M --pipe "jq -rc 'select(.type == \\"work\\")'" |
+ zstd -T0 -c > {output}
+ """,
+ n=self.n,
+ tmpdir=self.tmpdir,
+ input=self.input().path)
+ luigi.LocalTarget(output).move(self.output().path)
+
+ def output(self):
+ return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd)