diff options
| author | Martin Czygan <martin.czygan@gmail.com> | 2021-05-21 23:57:17 +0200 | 
|---|---|---|
| committer | Martin Czygan <martin.czygan@gmail.com> | 2021-05-21 23:57:17 +0200 | 
| commit | 92c2eb36e91a6bf3d6114315fb91494959a49c62 (patch) | |
| tree | dc9e0a94ccdb381e31fc92352b05d909d7e388f0 /python | |
| parent | 430ae0f7cc3cd15435a5b7a509e7fd4bbd3016e0 (diff) | |
| download | refcat-92c2eb36e91a6bf3d6114315fb91494959a49c62.tar.gz refcat-92c2eb36e91a6bf3d6114315fb91494959a49c62.zip | |
add OpenLibraryWorks
Diffstat (limited to 'python')
| -rw-r--r-- | python/notes/version_4.md | 15 | ||||
| -rw-r--r-- | python/refcat/tasks.py | 22 | 
2 files changed, 37 insertions, 0 deletions
| diff --git a/python/notes/version_4.md b/python/notes/version_4.md index 7b45083..a78f9e1 100644 --- a/python/notes/version_4.md +++ b/python/notes/version_4.md @@ -73,3 +73,18 @@ Example:  * https://openlibrary.org/works/OL488811W/The_debt?edition=debtwhatamerica000robi +``` +{ +  "biblio": { +    "container_name": "BLACK AFRICA: The Economic and Cultural Basis for a Federated State", +    "unstructured": "For details on African Renaissance see Cheikh Anta Diop, BLACK AFRICA: The Economic and Cultural Basis for a Federated State, New Expanded Edition. Trenton, NJ: Africa World Press, 1987.", +    "year": 1987 +  }, +  "index": 28, +  "key": "8_CR29", +  "ref_source": "crossref", +  "release_year": 2009, +  "release_ident": "2igycuiobvhxrcmmrzz6anufuq", +  "work_ident": "aaacj23jqbdxvajwj5kc6jpejq" +} +``` diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index a82c535..faf76fd 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -685,3 +685,25 @@ class BrefZipFuzzy(Refcat):      def output(self):          return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd) + + +class OpenLibraryWorks(Refcat): +    """ +    Extract just the works. +    """ +    def requires(self): +        return OpenLibraryDump() + +    def run(self): +        output = shellout(""" +                          zstdcat -T0 {input} | +                          parallel -j {n} --block 10M --pipe "jq -rc 'select(.type == \\"work\\")'" | +                          zstd -T0 -c > {output} +                          """, +                          n=self.n, +                          tmpdir=self.tmpdir, +                          input=self.input().path) +        luigi.LocalTarget(output).move(self.output().path) + +    def output(self): +        return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd) | 
