aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-09 18:32:59 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-09 18:32:59 -0700
commitf8ad6b272900518f48b69ea03f2c0906aae56fb5 (patch)
treeef2ae13648675e6ff5b17a6cd4fa23ca06406db5
parentec6789c863ed0dd25111b362f225195975147723 (diff)
downloadfatcat-covid19-f8ad6b272900518f48b69ea03f2c0906aae56fb5.tar.gz
fatcat-covid19-f8ad6b272900518f48b69ea03f2c0906aae56fb5.zip
add bs4 and lxml for abstract HTML stripping
-rw-r--r--Pipfile2
-rw-r--r--Pipfile.lock75
2 files changed, 67 insertions, 10 deletions
diff --git a/Pipfile b/Pipfile
index 5376c5f..6b69a24 100644
--- a/Pipfile
+++ b/Pipfile
@@ -12,6 +12,8 @@ Flask-Babel = "*"
elasticsearch-dsl = ">=6.0.0,<7.0.0"
elasticsearch = ">=6.0.0,<7.0.0"
sentry-sdk = { version = ">=0.14.0", extras = ["flask"] }
+bs4 = "*"
+lxml = "*"
[dev-packages]
pytest = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
index 51e8a40..3ab8b55 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
- "sha256": "6c1b3127ac2f175cac14e1b08425145b31107e2421fe9e2682a0673827e22737"
+ "sha256": "5b12c4080e544f40dbd55d4115d1d915d2a70bd5a3eb8548051946aba367f5f6"
},
"pipfile-spec": 6,
"requires": {
@@ -23,18 +23,33 @@
],
"version": "==2.8.0"
},
+ "beautifulsoup4": {
+ "hashes": [
+ "sha256:594ca51a10d2b3443cbac41214e12dbb2a1cd57e1a7344659849e2e20ba6a8d8",
+ "sha256:a4bbe77fd30670455c5296242967a123ec28c37e9702a8a81bd2f20a4baf0368",
+ "sha256:d4e96ac9b0c3a6d3f0caae2e4124e6055c5dcafde8e2f831ff194c104f0775a0"
+ ],
+ "version": "==4.9.0"
+ },
"blinker": {
"hashes": [
"sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6"
],
"version": "==1.4"
},
+ "bs4": {
+ "hashes": [
+ "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"
+ ],
+ "index": "pypi",
+ "version": "==0.0.1"
+ },
"certifi": {
"hashes": [
- "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3",
- "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f"
+ "sha256:1d987a998c75633c40847cc966fcf5904906c920a7f17ef374f5aa4282abd304",
+ "sha256:51fcb31174be6e6664c5f69e3e1691a2d72a1a12e90f872cbdb1567eb47b6519"
],
- "version": "==2019.11.28"
+ "version": "==2020.4.5.1"
},
"chardet": {
"hashes": [
@@ -68,11 +83,11 @@
},
"flask": {
"hashes": [
- "sha256:13f9f196f330c7c2c5d7a5cf91af894110ca0215ac051b5844701f2bfd934d52",
- "sha256:45eb5a6fd193d6cf7e0cf5d8a5b31f83d5faae0293695626f539a823e93b13f6"
+ "sha256:4efa1ae2d7c9865af48986de8aeb8504bf32c7f3d6fdc9353d34b21f4b127060",
+ "sha256:8a4fdd8936eba2512e9c85df320a37e694c93945b33ef33c89946a340a238557"
],
"index": "pypi",
- "version": "==1.1.1"
+ "version": "==1.1.2"
},
"flask-babel": {
"hashes": [
@@ -103,6 +118,39 @@
],
"version": "==2.11.1"
},
+ "lxml": {
+ "hashes": [
+ "sha256:06d4e0bbb1d62e38ae6118406d7cdb4693a3fa34ee3762238bcb96c9e36a93cd",
+ "sha256:0701f7965903a1c3f6f09328c1278ac0eee8f56f244e66af79cb224b7ef3801c",
+ "sha256:1f2c4ec372bf1c4a2c7e4bb20845e8bcf8050365189d86806bad1e3ae473d081",
+ "sha256:4235bc124fdcf611d02047d7034164897ade13046bda967768836629bc62784f",
+ "sha256:5828c7f3e615f3975d48f40d4fe66e8a7b25f16b5e5705ffe1d22e43fb1f6261",
+ "sha256:585c0869f75577ac7a8ff38d08f7aac9033da2c41c11352ebf86a04652758b7a",
+ "sha256:5d467ce9c5d35b3bcc7172c06320dddb275fea6ac2037f72f0a4d7472035cea9",
+ "sha256:63dbc21efd7e822c11d5ddbedbbb08cd11a41e0032e382a0fd59b0b08e405a3a",
+ "sha256:7bc1b221e7867f2e7ff1933165c0cec7153dce93d0cdba6554b42a8beb687bdb",
+ "sha256:8620ce80f50d023d414183bf90cc2576c2837b88e00bea3f33ad2630133bbb60",
+ "sha256:8a0ebda56ebca1a83eb2d1ac266649b80af8dd4b4a3502b2c1e09ac2f88fe128",
+ "sha256:90ed0e36455a81b25b7034038e40880189169c308a3df360861ad74da7b68c1a",
+ "sha256:95e67224815ef86924fbc2b71a9dbd1f7262384bca4bc4793645794ac4200717",
+ "sha256:afdb34b715daf814d1abea0317b6d672476b498472f1e5aacbadc34ebbc26e89",
+ "sha256:b4b2c63cc7963aedd08a5f5a454c9f67251b1ac9e22fd9d72836206c42dc2a72",
+ "sha256:d068f55bda3c2c3fcaec24bd083d9e2eede32c583faf084d6e4b9daaea77dde8",
+ "sha256:d5b3c4b7edd2e770375a01139be11307f04341ec709cf724e0f26ebb1eef12c3",
+ "sha256:deadf4df349d1dcd7b2853a2c8796593cc346600726eff680ed8ed11812382a7",
+ "sha256:df533af6f88080419c5a604d0d63b2c33b1c0c4409aba7d0cb6de305147ea8c8",
+ "sha256:e4aa948eb15018a657702fee0b9db47e908491c64d36b4a90f59a64741516e77",
+ "sha256:e5d842c73e4ef6ed8c1bd77806bf84a7cb535f9c0cf9b2c74d02ebda310070e1",
+ "sha256:ebec08091a22c2be870890913bdadd86fcd8e9f0f22bcb398abd3af914690c15",
+ "sha256:edc15fcfd77395e24543be48871c251f38132bb834d9fdfdad756adb6ea37679",
+ "sha256:f2b74784ed7e0bc2d02bd53e48ad6ba523c9b36c194260b7a5045071abbb1012",
+ "sha256:fa071559f14bd1e92077b1b5f6c22cf09756c6de7139370249eb372854ce51e6",
+ "sha256:fd52e796fee7171c4361d441796b64df1acfceb51f29e545e812f16d023c4bbc",
+ "sha256:fe976a0f1ef09b3638778024ab9fb8cde3118f203364212c198f71341c0715ca"
+ ],
+ "index": "pypi",
+ "version": "==4.5.0"
+ },
"markupsafe": {
"hashes": [
"sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473",
@@ -194,6 +242,13 @@
],
"version": "==1.14.0"
},
+ "soupsieve": {
+ "hashes": [
+ "sha256:e914534802d7ffd233242b785229d5ba0766a7f487385e3f714446a07bf540ae",
+ "sha256:fcd71e08c0aee99aca1b73f45478549ee7e7fc006d51b37bec9e9def7dc22b69"
+ ],
+ "version": "==2.0"
+ },
"urllib3": {
"hashes": [
"sha256:2f3db8b19923a873b3e5256dc9c2dedfa883e33d87c690d9c7913e1f40673cdc",
@@ -255,10 +310,10 @@
},
"pyparsing": {
"hashes": [
- "sha256:4c830582a84fb022400b85429791bc551f1f4871c33f23e44f353119e92f969f",
- "sha256:c342dccb5250c08d45fd6f8b4a559613ca603b57498511740e65cd11a2e7dcec"
+ "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
+ "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
],
- "version": "==2.4.6"
+ "version": "==2.4.7"
},
"pytest": {
"hashes": [