From 1eae78c37dcb605c369d977f4ad764694603641b Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Sun, 21 Mar 2021 01:39:13 +0100 Subject: add ol and wikipedia notes --- python/notes/openlibrary_works.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 python/notes/openlibrary_works.md (limited to 'python/notes/openlibrary_works.md') diff --git a/python/notes/openlibrary_works.md b/python/notes/openlibrary_works.md new file mode 100644 index 0000000..25df527 --- /dev/null +++ b/python/notes/openlibrary_works.md @@ -0,0 +1,27 @@ + +## Upstream Dumps + +Open Library does monthly bulk dumps: + +Latest work dump: + +TSV columns: + + type - type of record (/type/edition, /type/work etc.) + key - unique key of the record. (/books/OL1M etc.) + revision - revision number of the record + last_modified - last modified timestamp + JSON - the complete record in JSON format + + zcat ol_dump_works_latest.txt.gz | cut -f5 | head | jq . + +We are going to want: + +- title (with "prefix"?) +- authors +- subtitle +- year +- identifier (work? edition?) +- isbn-13 (if available) +- borrowable or not + -- cgit v1.2.3