diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/notes/version_4.md | 15 | ||||
-rw-r--r-- | python/refcat/tasks.py | 22 |
2 files changed, 37 insertions, 0 deletions
diff --git a/python/notes/version_4.md b/python/notes/version_4.md index 7b45083..a78f9e1 100644 --- a/python/notes/version_4.md +++ b/python/notes/version_4.md @@ -73,3 +73,18 @@ Example: * https://openlibrary.org/works/OL488811W/The_debt?edition=debtwhatamerica000robi +``` +{ + "biblio": { + "container_name": "BLACK AFRICA: The Economic and Cultural Basis for a Federated State", + "unstructured": "For details on African Renaissance see Cheikh Anta Diop, BLACK AFRICA: The Economic and Cultural Basis for a Federated State, New Expanded Edition. Trenton, NJ: Africa World Press, 1987.", + "year": 1987 + }, + "index": 28, + "key": "8_CR29", + "ref_source": "crossref", + "release_year": 2009, + "release_ident": "2igycuiobvhxrcmmrzz6anufuq", + "work_ident": "aaacj23jqbdxvajwj5kc6jpejq" +} +``` diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index a82c535..faf76fd 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -685,3 +685,25 @@ class BrefZipFuzzy(Refcat): def output(self): return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd) + + +class OpenLibraryWorks(Refcat): + """ + Extract just the works. + """ + def requires(self): + return OpenLibraryDump() + + def run(self): + output = shellout(""" + zstdcat -T0 {input} | + parallel -j {n} --block 10M --pipe "jq -rc 'select(.type == \\"work\\")'" | + zstd -T0 -c > {output} + """, + n=self.n, + tmpdir=self.tmpdir, + input=self.input().path) + luigi.LocalTarget(output).move(self.output().path) + + def output(self): + return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd) |