From f3a721a9dce8801b78f7bc31e88dc912b0ec1dba Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 23 Dec 2022 15:52:02 -0800 Subject: move a bunch of top-level files/directories to ./extra/ --- extra/nginx/README.md | 18 ++++++++++ extra/nginx/fatcat-blobs | 51 +++++++++++++++++++++++++++ extra/nginx/sandcrawler-db | 80 +++++++++++++++++++++++++++++++++++++++++++ extra/nginx/sandcrawler-minio | 57 ++++++++++++++++++++++++++++++ 4 files changed, 206 insertions(+) create mode 100644 extra/nginx/README.md create mode 100644 extra/nginx/fatcat-blobs create mode 100644 extra/nginx/sandcrawler-db create mode 100644 extra/nginx/sandcrawler-minio (limited to 'extra/nginx') diff --git a/extra/nginx/README.md b/extra/nginx/README.md new file mode 100644 index 0000000..0369f9b --- /dev/null +++ b/extra/nginx/README.md @@ -0,0 +1,18 @@ + +This folder contains nginx configs for partner access to sandcrawler DB +(postgrest) and GROBID XML blobs (minio). + +`fatcat-blobs` is part of the fatcat.wiki ansible config, but included here to +show how it works. + +## Let's Encrypt + +As... bnewbold? + + sudo certbot certonly \ + --non-interactive \ + --agree-tos \ + --email bnewbold@archive.org \ + --webroot -w /var/www/letsencrypt \ + -d sandcrawler-minio.fatcat.wiki \ + -d sandcrawler-db.fatcat.wiki diff --git a/extra/nginx/fatcat-blobs b/extra/nginx/fatcat-blobs new file mode 100644 index 0000000..5c692ef --- /dev/null +++ b/extra/nginx/fatcat-blobs @@ -0,0 +1,51 @@ + +server { + listen 80; + listen [::]:80; + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name blobs.fatcat.wiki; + + ssl_certificate /etc/letsencrypt/live/fatcat.wiki/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/fatcat.wiki/privkey.pem; + + #add_header Content-Security-Policy "default-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'"; + add_header X-Frame-Options "SAMEORIGIN"; # 'always' if nginx > 1.7.5 + add_header X-Content-Type-Options "nosniff"; # 'always' if nginx > 1.7.5 + add_header X-Xss-Protection "1"; + # Enable STS with one year period (breaks http; optional) + #add_header Strict-Transport-Security "max-age=31557600; includeSubDomains"; + + error_log /var/log/nginx/fatcat-errors.log; + access_log /dev/null; + + if ($scheme = http) { + return 301 https://$server_name$request_uri; + } + + location /unpaywall/ { + if ($request_method !~ "GET") { + return 403; + break; + } + + #proxy_pass http://sandcrawler-minio.fatcat.wiki:9000$uri$is_args$args; + proxy_pass http://207.241.227.141:9000$uri$is_args$args; + proxy_redirect off; + + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Host $http_host; + } + + location / { + default_type text/plain; + return 504 'blobs.fatcat.wiki hosts many files; full URLs are required!\nyou probably want https://fatcat.wiki/ instead'; + } + + # Let's Encrypt SSL Certs + location /.well-known/acme-challenge/ { + root /var/www/letsencrypt; + autoindex off; + } +} diff --git a/extra/nginx/sandcrawler-db b/extra/nginx/sandcrawler-db new file mode 100644 index 0000000..67d1a2d --- /dev/null +++ b/extra/nginx/sandcrawler-db @@ -0,0 +1,80 @@ + +upstream postgrest { + server localhost:3030; + keepalive 64; +} + +server { + listen 80; + listen [::]:80; + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name sandcrawler-db.fatcat.wiki db.sandcrawler.org; + + ssl_certificate /etc/letsencrypt/live/sandcrawler-minio.fatcat.wiki/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/sandcrawler-minio.fatcat.wiki/privkey.pem; + + #add_header Content-Security-Policy "default-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'"; + add_header X-Frame-Options "SAMEORIGIN"; # 'always' if nginx > 1.7.5 + add_header X-Content-Type-Options "nosniff"; # 'always' if nginx > 1.7.5 + add_header X-Xss-Protection "1"; + # Enable STS with one year period (breaks http; optional) + #add_header Strict-Transport-Security "max-age=31557600; includeSubDomains"; + + error_log /var/log/nginx/sandcrawler-errors.log; + access_log /dev/null; + + if ($scheme = http) { + return 301 https://$server_name$request_uri; + } + + location / { + + default_type application/json; + + if ($request_method !~ "GET") { + return 403; + break; + } + + proxy_redirect off; + + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Host $http_host; + + proxy_pass http://postgrest/; + } + + # support /endpoint/:id url style for sha1hex lookups + location ~ "^/(file_meta|grobid|fatcat_file)/([a-f0-9]{40})$" { + + if ($request_method !~ "GET") { + return 403; + break; + } + + # assuming an upstream named "postgrest" + # doing this rewrite as part of the proxy_pass line itself didn't seem + # to work, so doing a formal rewrite here + rewrite "/([a-z_]+)/([a-f0-9]{40})" /$1?sha1hex=eq.$2 break; + proxy_pass http://postgrest; + + # make the response singular + #default_type application/vnd.pgrst.object+json; + proxy_set_header Accept "application/vnd.pgrst.object+json"; + + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + # Let's Encrypt SSL Certs + location /.well-known/acme-challenge/ { + root /var/www/letsencrypt; + autoindex off; + } +} diff --git a/extra/nginx/sandcrawler-minio b/extra/nginx/sandcrawler-minio new file mode 100644 index 0000000..2e9bfe3 --- /dev/null +++ b/extra/nginx/sandcrawler-minio @@ -0,0 +1,57 @@ + +server { + listen 80; + listen [::]:80; + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name sandcrawler-minio.fatcat.wiki minio.sandcrawler.org; + + ssl_certificate /etc/letsencrypt/live/sandcrawler-minio.fatcat.wiki/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/sandcrawler-minio.fatcat.wiki/privkey.pem; + + #add_header Content-Security-Policy "default-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'"; + add_header X-Frame-Options "SAMEORIGIN"; # 'always' if nginx > 1.7.5 + add_header X-Content-Type-Options "nosniff"; # 'always' if nginx > 1.7.5 + add_header X-Xss-Protection "1"; + # Enable STS with one year period (breaks http; optional) + #add_header Strict-Transport-Security "max-age=31557600; includeSubDomains"; + + error_log /var/log/nginx/sandcrawler-errors.log; + access_log /dev/null; + + if ($scheme = http) { + return 301 https://$server_name$request_uri; + } + + location /minio/ { + + # allows all HTTP verbs + + proxy_pass http://localhost:9000; + proxy_redirect off; + + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Host $http_host; + } + + location / { + if ($request_method !~ "GET") { + return 403; + break; + } + + proxy_pass http://localhost:9000; + proxy_redirect off; + + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Host $http_host; + } + + # Let's Encrypt SSL Certs + location /.well-known/acme-challenge/ { + root /var/www/letsencrypt; + autoindex off; + } +} -- cgit v1.2.3