diff options
Diffstat (limited to 'extra')
| -rw-r--r-- | extra/docker/Dockerfile.test-base | 47 | ||||
| -rw-r--r-- | extra/docker/README.md | 4 | ||||
| -rw-r--r-- | extra/stats/2020-02-21-prod-stats.json | 1 | ||||
| -rw-r--r-- | extra/stats/2020-02-21-prod-tables-sizes.txt | 48 | ||||
| -rw-r--r-- | extra/stats/2020-02-24-prod-dupes.txt | 5 | ||||
| -rw-r--r-- | extra/stats/2020-02-24-prod-table-sizes.txt | 47 | ||||
| -rw-r--r-- | extra/stats/2020-03-03-prod-stats.json | 1 | ||||
| -rw-r--r-- | extra/stats/2020-04-17-prod-stats.json | 1 | ||||
| -rw-r--r-- | extra/stats/2020-04-17-prod-table-sizes.txt | 46 | 
9 files changed, 200 insertions, 0 deletions
| diff --git a/extra/docker/Dockerfile.test-base b/extra/docker/Dockerfile.test-base new file mode 100644 index 00000000..a556ed99 --- /dev/null +++ b/extra/docker/Dockerfile.test-base @@ -0,0 +1,47 @@ + +FROM ubuntu:xenial + +ENV RUSTUP_HOME=/usr/local/rustup \ +    CARGO_HOME=/usr/local/cargo \ +    PATH=/usr/local/cargo/bin:$PATH \ +    RUST_VERSION=1.42.0 \ +    LC_ALL=C.UTF-8 \ +    LANG=C.UTF-8 + + +# Add deadsnakes repo +RUN set -eux; \ +    apt update -qy; \ +    apt install -y software-properties-common; \ +    add-apt-repository -y ppa:deadsnakes/ppa; + +# APT dependencies +RUN set -eux; \ +    apt update -qy; \ +    apt install -y python3-dev python3-pip python3-wheel python3-requests python3-six python3-pytest libsnappy-dev libsodium-dev software-properties-common python3.7 python3.7-dev python3.7-venv python3.7-distutils wget libpq-dev pkg-config python3-pytest git + +# Rust setup from docker-rust debian Dockerfile +RUN set -eux; \ +    dpkgArch="$(dpkg --print-architecture)"; \ +    rustArch='x86_64-unknown-linux-gnu'; rustupSha256='ad1f8b5199b3b9e231472ed7aa08d2e5d1d539198a15c5b1e53c746aad81d27b' ; \ +    url="https://static.rust-lang.org/rustup/archive/1.21.1/${rustArch}/rustup-init"; \ +    wget "$url"; \ +    echo "${rustupSha256} *rustup-init" | sha256sum -c -; \ +    chmod +x rustup-init; \ +    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION; \ +    rm rustup-init; \ +    chmod -R a+w $RUSTUP_HOME $CARGO_HOME; \ +    rustup --version; \ +    cargo --version; \ +    rustc --version; + +# Compile and install diesel +RUN set -eux; \ +    cargo install diesel_cli --version 1.3.1 --no-default-features --features postgres; \ +    diesel --version + +# Install pipenv +RUN set -eux; \ +    pip3 install pipenv; \ +    pipenv --version + diff --git a/extra/docker/README.md b/extra/docker/README.md index 4fd78e56..1869b354 100644 --- a/extra/docker/README.md +++ b/extra/docker/README.md @@ -17,3 +17,7 @@ TODO:  - postgres  - fatcatd (rust)  - kibana + +## Test Base Image + +    docker build -t bnewbold/fatcat-test-base -f Dockerfile.test-base . diff --git a/extra/stats/2020-02-21-prod-stats.json b/extra/stats/2020-02-21-prod-stats.json new file mode 100644 index 00000000..3ab6471f --- /dev/null +++ b/extra/stats/2020-02-21-prod-stats.json @@ -0,0 +1 @@ +{"changelog":{"latest":{"index":3528195,"timestamp":"2020-02-22T05:23:18.082262+00:00"}},"container":{"total":148396},"papers":{"in_kbart":60529767,"in_web":20374670,"in_web_not_kbart":9598464,"is_oa":11547112,"total":105732384},"release":{"refs_total":890869519,"total":143867045}} diff --git a/extra/stats/2020-02-21-prod-tables-sizes.txt b/extra/stats/2020-02-21-prod-tables-sizes.txt new file mode 100644 index 00000000..bc756ba7 --- /dev/null +++ b/extra/stats/2020-02-21-prod-tables-sizes.txt @@ -0,0 +1,48 @@ + +Size:  478.37G + +              table_name               | table_size | indexes_size | total_size  +---------------------------------------+------------+--------------+------------ + "public"."release_contrib"            | 53 GB      | 43 GB        | 96 GB + "public"."release_rev"                | 58 GB      | 33 GB        | 91 GB + "public"."refs_blob"                  | 85 GB      | 2884 MB      | 88 GB + "public"."release_edit"               | 14 GB      | 20 GB        | 34 GB + "public"."work_edit"                  | 13 GB      | 20 GB        | 34 GB + "public"."release_ident"              | 9515 MB    | 15 GB        | 24 GB + "public"."work_ident"                 | 9313 MB    | 15 GB        | 24 GB + "public"."abstracts"                  | 16 GB      | 1504 MB      | 18 GB + "public"."file_rev_url"               | 10235 MB   | 3587 MB      | 13 GB + "public"."work_rev"                   | 6046 MB    | 5825 MB      | 12 GB + "public"."release_ref"                | 3997 MB    | 5690 MB      | 9686 MB + "public"."file_rev"                   | 3635 MB    | 5359 MB      | 8994 MB + "public"."file_edit"                  | 3111 MB    | 4051 MB      | 7162 MB + "public"."release_rev_abstract"       | 2406 MB    | 3342 MB      | 5749 MB + "public"."file_ident"                 | 1848 MB    | 2505 MB      | 4354 MB + "public"."file_rev_release"           | 1698 MB    | 2483 MB      | 4181 MB + "public"."creator_edit"               | 702 MB     | 942 MB       | 1643 MB + "public"."creator_rev"                | 695 MB     | 719 MB       | 1413 MB + "public"."editgroup"                  | 767 MB     | 405 MB       | 1172 MB + "public"."creator_ident"              | 474 MB     | 648 MB       | 1121 MB + "public"."release_rev_extid"          | 200 MB     | 312 MB       | 512 MB + "public"."changelog"                  | 220 MB     | 214 MB       | 434 MB + "public"."container_rev"              | 75 MB      | 23 MB        | 98 MB + "public"."container_edit"             | 25 MB      | 31 MB        | 56 MB + "public"."container_ident"            | 11 MB      | 19 MB        | 30 MB + "public"."webcapture_rev_cdx"         | 64 kB      | 32 kB        | 96 kB + "public"."fileset_rev_file"           | 48 kB      | 32 kB        | 80 kB + "public"."auth_oidc"                  | 16 kB      | 48 kB        | 64 kB + "public"."fileset_edit"               | 16 kB      | 48 kB        | 64 kB + "public"."editor"                     | 16 kB      | 48 kB        | 64 kB + "public"."webcapture_edit"            | 16 kB      | 48 kB        | 64 kB + "public"."editgroup_annotation"       | 16 kB      | 48 kB        | 64 kB + "public"."fileset_rev_url"            | 16 kB      | 32 kB        | 48 kB + "public"."webcapture_rev_url"         | 16 kB      | 32 kB        | 48 kB + "public"."fileset_rev_release"        | 8192 bytes | 32 kB        | 40 kB + "public"."fileset_ident"              | 8192 bytes | 32 kB        | 40 kB + "public"."webcapture_rev_release"     | 8192 bytes | 32 kB        | 40 kB + "public"."webcapture_ident"           | 8192 bytes | 32 kB        | 40 kB + "public"."fileset_rev"                | 16 kB      | 16 kB        | 32 kB + "public"."webcapture_rev"             | 16 kB      | 16 kB        | 32 kB + "public"."__diesel_schema_migrations" | 8192 bytes | 16 kB        | 24 kB +(41 rows) + diff --git a/extra/stats/2020-02-24-prod-dupes.txt b/extra/stats/2020-02-24-prod-dupes.txt new file mode 100644 index 00000000..7d1d09cf --- /dev/null +++ b/extra/stats/2020-02-24-prod-dupes.txt @@ -0,0 +1,5 @@ +   19409 doi_ident.dupes.tsv +   28530 pmcid_ident.dupes.tsv +  463523 pmid_ident.dupes.tsv +    2025 sha1_ident.dupes.tsv +      10 wikidata_ident.dupes.tsv diff --git a/extra/stats/2020-02-24-prod-table-sizes.txt b/extra/stats/2020-02-24-prod-table-sizes.txt new file mode 100644 index 00000000..359cb2f3 --- /dev/null +++ b/extra/stats/2020-02-24-prod-table-sizes.txt @@ -0,0 +1,47 @@ + +Size:  560.76G + +              table_name               | table_size | indexes_size | total_size  +---------------------------------------+------------+--------------+------------ + "public"."release_contrib"            | 53 GB      | 43 GB        | 96 GB + "public"."release_rev"                | 58 GB      | 33 GB        | 91 GB + "public"."refs_blob"                  | 85 GB      | 2884 MB      | 88 GB + "public"."file_rev"                   | 23 GB      | 26 GB        | 49 GB + "public"."release_edit"               | 14 GB      | 20 GB        | 34 GB + "public"."work_edit"                  | 13 GB      | 20 GB        | 34 GB + "public"."release_ident"              | 9517 MB    | 15 GB        | 24 GB + "public"."work_ident"                 | 9315 MB    | 15 GB        | 24 GB + "public"."file_edit"                  | 9555 MB    | 14 GB        | 24 GB + "public"."abstracts"                  | 16 GB      | 1505 MB      | 18 GB + "public"."file_rev_url"               | 13 GB      | 4730 MB      | 17 GB + "public"."file_ident"                 | 5885 MB    | 9480 MB      | 15 GB + "public"."file_rev_release"           | 5515 MB    | 9536 MB      | 15 GB + "public"."work_rev"                   | 6047 MB    | 5825 MB      | 12 GB + "public"."release_ref"                | 3997 MB    | 5690 MB      | 9686 MB + "public"."release_rev_abstract"       | 2408 MB    | 3343 MB      | 5751 MB + "public"."creator_edit"               | 702 MB     | 942 MB       | 1643 MB + "public"."creator_rev"                | 695 MB     | 719 MB       | 1413 MB + "public"."editgroup"                  | 903 MB     | 465 MB       | 1368 MB + "public"."creator_ident"              | 474 MB     | 648 MB       | 1121 MB + "public"."release_rev_extid"          | 200 MB     | 312 MB       | 512 MB + "public"."changelog"                  | 261 MB     | 229 MB       | 490 MB + "public"."container_rev"              | 75 MB      | 23 MB        | 98 MB + "public"."container_edit"             | 25 MB      | 31 MB        | 56 MB + "public"."container_ident"            | 11 MB      | 19 MB        | 30 MB + "public"."webcapture_rev_cdx"         | 64 kB      | 32 kB        | 96 kB + "public"."fileset_rev_file"           | 48 kB      | 32 kB        | 80 kB + "public"."auth_oidc"                  | 16 kB      | 48 kB        | 64 kB + "public"."editgroup_annotation"       | 16 kB      | 48 kB        | 64 kB + "public"."fileset_edit"               | 16 kB      | 48 kB        | 64 kB + "public"."webcapture_edit"            | 16 kB      | 48 kB        | 64 kB + "public"."editor"                     | 16 kB      | 48 kB        | 64 kB + "public"."fileset_rev_url"            | 16 kB      | 32 kB        | 48 kB + "public"."webcapture_rev_url"         | 16 kB      | 32 kB        | 48 kB + "public"."fileset_rev_release"        | 8192 bytes | 32 kB        | 40 kB + "public"."fileset_ident"              | 8192 bytes | 32 kB        | 40 kB + "public"."webcapture_rev_release"     | 8192 bytes | 32 kB        | 40 kB + "public"."webcapture_ident"           | 8192 bytes | 32 kB        | 40 kB + "public"."fileset_rev"                | 16 kB      | 16 kB        | 32 kB + "public"."webcapture_rev"             | 16 kB      | 16 kB        | 32 kB + "public"."__diesel_schema_migrations" | 8192 bytes | 16 kB        | 24 kB +(41 rows) diff --git a/extra/stats/2020-03-03-prod-stats.json b/extra/stats/2020-03-03-prod-stats.json new file mode 100644 index 00000000..0ac977b8 --- /dev/null +++ b/extra/stats/2020-03-03-prod-stats.json @@ -0,0 +1 @@ +{"changelog":{"latest":{"index":4242658,"timestamp":"2020-03-03T18:35:06.153130+00:00"}},"container":{"total":148428},"papers":{"in_kbart":60594053,"in_web":22232097,"in_web_not_kbart":10756782,"is_oa":15267353,"total":105933568},"release":{"refs_total":893136234,"total":144138471}} diff --git a/extra/stats/2020-04-17-prod-stats.json b/extra/stats/2020-04-17-prod-stats.json new file mode 100644 index 00000000..ddf7fca1 --- /dev/null +++ b/extra/stats/2020-04-17-prod-stats.json @@ -0,0 +1 @@ +{"changelog":{"latest":{"index":4460684,"timestamp":"2020-04-17T18:03:34.373631+00:00"}},"container":{"total":149527},"papers":{"in_kbart":60679890,"in_web":24250766,"in_web_not_kbart":11970984,"is_oa":15538739,"total":108761510},"release":{"refs_total":914708032,"total":148081134}} diff --git a/extra/stats/2020-04-17-prod-table-sizes.txt b/extra/stats/2020-04-17-prod-table-sizes.txt new file mode 100644 index 00000000..79aa3b98 --- /dev/null +++ b/extra/stats/2020-04-17-prod-table-sizes.txt @@ -0,0 +1,46 @@ +Size:  591.60G + +              table_name               | table_size | indexes_size | total_size  +---------------------------------------+------------+--------------+------------ + "public"."release_contrib"            | 55 GB      | 45 GB        | 100 GB + "public"."release_rev"                | 60 GB      | 34 GB        | 94 GB + "public"."refs_blob"                  | 87 GB      | 2885 MB      | 89 GB + "public"."file_rev"                   | 26 GB      | 29 GB        | 55 GB + "public"."release_edit"               | 14 GB      | 21 GB        | 35 GB + "public"."work_edit"                  | 14 GB      | 21 GB        | 34 GB + "public"."file_edit"                  | 11 GB      | 16 GB        | 27 GB + "public"."release_ident"              | 9821 MB    | 15 GB        | 24 GB + "public"."work_ident"                 | 9596 MB    | 15 GB        | 24 GB + "public"."file_rev_url"               | 15 GB      | 6040 MB      | 21 GB + "public"."abstracts"                  | 18 GB      | 1688 MB      | 19 GB + "public"."file_ident"                 | 6694 MB    | 10219 MB     | 17 GB + "public"."file_rev_release"           | 6267 MB    | 10109 MB     | 16 GB + "public"."work_rev"                   | 6233 MB    | 5825 MB      | 12 GB + "public"."release_ref"                | 4441 MB    | 6322 MB      | 11 GB + "public"."release_rev_abstract"       | 2637 MB    | 3505 MB      | 6141 MB + "public"."creator_edit"               | 702 MB     | 942 MB       | 1643 MB + "public"."editgroup"                  | 980 MB     | 502 MB       | 1482 MB + "public"."creator_rev"                | 695 MB     | 719 MB       | 1413 MB + "public"."creator_ident"              | 474 MB     | 648 MB       | 1121 MB + "public"."changelog"                  | 289 MB     | 239 MB       | 527 MB + "public"."release_rev_extid"          | 206 MB     | 320 MB       | 526 MB + "public"."container_rev"              | 75 MB      | 23 MB        | 98 MB + "public"."container_edit"             | 25 MB      | 32 MB        | 57 MB + "public"."container_ident"            | 11 MB      | 19 MB        | 30 MB + "public"."webcapture_rev_cdx"         | 64 kB      | 32 kB        | 96 kB + "public"."fileset_rev_file"           | 48 kB      | 32 kB        | 80 kB + "public"."auth_oidc"                  | 16 kB      | 48 kB        | 64 kB + "public"."editgroup_annotation"       | 16 kB      | 48 kB        | 64 kB + "public"."fileset_edit"               | 16 kB      | 48 kB        | 64 kB + "public"."webcapture_edit"            | 16 kB      | 48 kB        | 64 kB + "public"."editor"                     | 16 kB      | 48 kB        | 64 kB + "public"."fileset_rev_url"            | 16 kB      | 32 kB        | 48 kB + "public"."webcapture_rev_url"         | 16 kB      | 32 kB        | 48 kB + "public"."fileset_rev_release"        | 8192 bytes | 32 kB        | 40 kB + "public"."fileset_ident"              | 8192 bytes | 32 kB        | 40 kB + "public"."webcapture_rev_release"     | 8192 bytes | 32 kB        | 40 kB + "public"."webcapture_ident"           | 8192 bytes | 32 kB        | 40 kB + "public"."fileset_rev"                | 16 kB      | 16 kB        | 32 kB + "public"."webcapture_rev"             | 16 kB      | 16 kB        | 32 kB + "public"."__diesel_schema_migrations" | 8192 bytes | 16 kB        | 24 kB +(41 rows) | 
