From 103f75a6e6af836f7a4afa5746847ef7ce63591d Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Tue, 15 Dec 2020 04:09:26 +0100 Subject: include matching (stub) --- Pipfile | 2 + Pipfile.lock | 501 ++++++++++++++++++++++++++++----------------------- fuzzycat/matching.py | 91 ++++++++++ setup.py | 3 + 4 files changed, 376 insertions(+), 221 deletions(-) create mode 100644 fuzzycat/matching.py diff --git a/Pipfile b/Pipfile index 28fe393..5b190f9 100644 --- a/Pipfile +++ b/Pipfile @@ -24,6 +24,8 @@ nltk = "*" regex = "*" pytest-cov = "*" glom = "*" +elasticsearch = "*" +elasticsearch-dsl = "*" [requires] python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock index 923ff3b..6f0fb6e 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "2302ed99d06867bc58dd112e5cc4cff090efda5f8be705ba1124c29771e31c82" + "sha256": "98b3f52b4b9617c7b902cf7288f34a4de6653446638e259de07cc0b639aa10b1" }, "pipfile-spec": 6, "requires": { @@ -26,22 +26,22 @@ }, "blis": { "hashes": [ - "sha256:19557b14763253ca3d4f6cfc9c9fe2eed3d65db14fa273ced8b0c17ce2bfda4a", - "sha256:221eef1b351063b0eb6682c7af2916884ce6c5015d95f6a28b7e45bea35e493c", - "sha256:3917491f366467a7339997dcfe501a282409cc50f461c6e1491d342a424442f1", - "sha256:55bacf2a1eb72ae728b665ac46433bcfee670de84f9bdf99987adf836fd92309", - "sha256:59cc549e904fd4169105f92d6fac94495eca228c357232f29444ae79981af974", - "sha256:6e65ef84205fadc1c41e59bd9d0d67fca8c41d463761b2dfe447003840172062", - "sha256:7f3e9564023e59a0abe9642a94408b3d2fe05ff205d800d512a67828b61819a8", - "sha256:9130e194f01124975a7bd76b8b65550f1f308610d596ec1992280b656e0c11be", - "sha256:ae53b880a1bca8115f1edccfa6e8800661c71347f9b75ed3f1af8549f080dd6b", - "sha256:b0fa63e4ca21a189698ca764367ae91ccba2274d885563c81205c1ac09f36fa6", - "sha256:c04165d196e16f652d21c2d1403a55fbae8721d74dbca1668fdfec6766687a12", - "sha256:ca0bccd31a216931ada487264ad07fa64a0b5a42b51c80cdebb1d3764b813d3b", - "sha256:e4e8b0dfd32e12f6d380b77a916ef15793756b856d8b00453a7d0ed4ba29f9ea" + "sha256:168fd7bd763ebe529aa25a066d3a6b89f4c3f492f6297f881df6942741b95787", + "sha256:4222bbc7b9c47bc3cf6f36f2241862c1512ca7ebac3828267a2e05ef6c47fc54", + "sha256:445e4838b809e99677f5c0982fb9af320f0d91328fb28c8097e5f1173c4df9d6", + "sha256:5b403deb2ad5515e1edb3c0867bccb5b974b461f24283d9219a3a761fd6dacc6", + "sha256:5c1a2023f7d8431daa8d87d32f539bb23e1a009500c37f9eba0ac7b3f20f73eb", + "sha256:5ecddc4c6daf80558154b091db0a9839bb15dbe65d2906a543a73b93fbce4f73", + "sha256:6814991b3e3193db4f9b2417174c6f24b9c0197409d864fa7628583bd2df1f0f", + "sha256:78a8e0ee72a42c3b2f5b9114500a781119995f76fa6c21d4b02c6fb9c21df2cc", + "sha256:7daa615a97d4f28db0f332b710bfe1900b15d0c25841c6d727965e4fd91e09cf", + "sha256:94890b2296f1449baa56aede46627ea7fc8de11c788f9c261ee38c2eb4a2cc7d", + "sha256:9f9b829480c12fc834549306821e5c51cb28b216ca5f88c5b2cfedbeb9daf67d", + "sha256:c2d8064217c326dd9a0dcbae294ffe8557263e2a00d76101ffa222b9c9d9c62d", + "sha256:d717b5dea407aac89a646908e7d9849105abab9c88a539c120518c200f899f4e" ], "markers": "python_version >= '3.6'", - "version": "==0.7.3" + "version": "==0.7.4" }, "boltons": { "hashes": [ @@ -60,10 +60,10 @@ }, "certifi": { "hashes": [ - "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd", - "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4" + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" ], - "version": "==2020.11.8" + "version": "==2020.12.5" }, "chardet": { "hashes": [ @@ -122,21 +122,37 @@ }, "cymem": { "hashes": [ - "sha256:02c3994ee57003dd57d9a34417beb9597e51176e9378a757344347b72510ce8d", - "sha256:0f169ee07f30c2674ca811693cb13857e236dd7f92b4bc47b0f2bb201e27cc93", - "sha256:2baabeb408e4aaa0f51d7f6d8c53dd05f56decf34fb3f1f3180abe8755815890", - "sha256:2d40dfa4b4edf64541192966c67c2706e5058a9c6ce0bdc356e4d6ef1935c9ae", - "sha256:45b01ee36ce15aab6df041b4128de38016e3934274b934c44fec43de5100b64c", - "sha256:58f49a51bd8b76b260f38f8274ec2e4613c328423e0b543a503834de32dfa3c4", - "sha256:60b5d969f305a7598d86f34c93c37eb52b3388681e70b98081864f0f185da768", - "sha256:65e5759179b311b8350b82b29744324366e270f96095b16a8e249c73c3f9273f", - "sha256:7bb5791fde8dffa02005f158f5c98d64962adaf8501dac7a9b8156b8767c9a34", - "sha256:a61684d01a1413049e9874e073c9577d406d866eabe0d167c5ee15bbeae090a6", - "sha256:a61a4702dcc50ebd31fb86a90d1aba5526245af592d756c39c63d58d53360afc", - "sha256:ae283889f6f0036fb34d12358fe052d4ea9f5724f05177317f33f302f304b47c", - "sha256:f8f280f0aef46d32655b1b24ff049bea83b63365604b6c2decb112b44ed851fe" - ], - "version": "==2.0.4" + "sha256:01d3ea159f7a3f3192b1e800ed8207dac7586794d903a153198b9ea317f144bc", + "sha256:190e15d9cf2c3bde60ae37bddbae6568a36044dc4a326d84081a5fa08818eee0", + "sha256:1f0eb9b3d03623dcfc746cf8bff0663b0e347f4aea759965c8932087a0307ee9", + "sha256:3d48902d7441645835fefc7832df49feb5362c7300d182475b63a01d25ae44ef", + "sha256:4bd023c2477198b39b660c2a6b0242880649765ecee8461688a57fd4afd2bfc0", + "sha256:734d82d0d03c2ceb929bc1744c04dbe0a105e68a4947c8406056a36f86c41830", + "sha256:8ea57e6923f40eb51012352161bb5707c14a5a5ce901ff72021e59df06221655", + "sha256:9d72d69f7a62a280199c3aa7bc550685c47d6d0689b2d299e6492253b86d2437", + "sha256:a440d63577fcdc9c528c9cc026b7b4f8648193bac462bc0596c9eac10f9fba62", + "sha256:ce1e81c1d031f56b67bac2136e73b4512cbc794706cd570178972d54ba6115d8", + "sha256:d19f68b90411e02ab33b1654118337f96f41c13a3cd00c4f44f7abed2bc712e7", + "sha256:d307f7f6230d861a938837cae4b855226b6845a21c010242a15e9ce6853856cd", + "sha256:f2167c9959fcd639b95d51fa5efaa7c61eef8d686cb75a25412a914f428ce980" + ], + "version": "==2.0.5" + }, + "elasticsearch": { + "hashes": [ + "sha256:4ebd34fd223b31c99d9f3b6b6236d3ac18b3046191a37231e8235b06ae7db955", + "sha256:a725dd923d349ca0652cf95d6ce23d952e2153740cf4ab6daf4a2d804feeed48" + ], + "index": "pypi", + "version": "==7.10.1" + }, + "elasticsearch-dsl": { + "hashes": [ + "sha256:0ed75f6ff037e36b2397a8e92cae0ddde79b83adc70a154b8946064cb62f7301", + "sha256:9390d8e5cf82ebad3505e7f656e407259cf703f5a4035f211cef454127672572" + ], + "index": "pypi", + "version": "==7.3.0" }, "face": { "hashes": [ @@ -145,6 +161,14 @@ ], "version": "==20.1.1" }, + "fatcat-openapi-client": { + "hashes": [ + "sha256:2dfec611d1eb8396ba8812ea3c6dfdc250b098870aafcecf895ae31fa2a90c5f", + "sha256:fd7f3089d3cc223c1b560125b688e8ce0393ba76edfe87506464db7a1e38d0b1" + ], + "markers": "python_full_version >= '3.5.0'", + "version": "==0.3.2" + }, "ftfy": { "hashes": [ "sha256:51c7767f8c4b47d291fcef30b9625fb5341c06a31e6a3b627039c706c42f3720" @@ -196,33 +220,33 @@ }, "joblib": { "hashes": [ - "sha256:698c311779f347cf6b7e6b8a39bb682277b8ee4aba8cf9507bc0cf4cd4737b72", - "sha256:9e284edd6be6b71883a63c9b7f124738a3c16195513ad940eae7e3438de885d5" + "sha256:75ead23f13484a2a414874779d69ade40d4fa1abe62b222a23cd50d4bc822f6f", + "sha256:7ad866067ac1fdec27d51c8678ea760601b70e32ff1881d4dc8e1171f2b64b24" ], "markers": "python_version >= '3.6'", - "version": "==0.17.0" + "version": "==1.0.0" }, "murmurhash": { "hashes": [ - "sha256:01f1aefaeaa8abf5ad102064bdbb7c1a2542240b105c0a13ee0cc81aea3dc081", - "sha256:2290e509ecbb8e79903e7859d9ec57b26f2019a8ffcaefab56862aa05f09e481", - "sha256:3736e65a926e4093e2adb9397394fb01b193b37b93b8a3eb16053401dea36840", - "sha256:40172b79dcccbf36f861429102df4bdb16e22dc14aecfe8c83fd414d92787516", - "sha256:422084ac1fe994cb7c893689c600923dee4e2c3fc74e832f7d9a8d6fdcc362d5", - "sha256:64bdc91f13cbe24f5f631e82d3c1a8f621ee7a8ce4965cb4a50a29ab6363fa95", - "sha256:7cfeaf95bc2e9192b9f4d44a71ad117462578cb021c441fc8938cc809e518a0f", - "sha256:81998ffd671164e9c4bc48cba61d912c1cb6555226177985045b8c9845772817", - "sha256:9086d0589b6afe24601c1d3e8eb55d9705fdb3d86c24fdfd97a85a3063274edf", - "sha256:92145fb1f3ae037c7a3b192ef1beca69c66389fca1e48ca35da56cae899a6e9f", - "sha256:9ae4a4e27cbfbc777cbf88ce863c8dc4ee3d9a6081a9a6a579592e25ebd874a5", - "sha256:aebeb840c68af1998cb94dcae213fed3f62041cf21e9adfe41288a482a2a8f35", - "sha256:bf8edbbad46a27fd32f44a7dec287cc012ce14c36eafba2e6c46564bfc3bfbb5", - "sha256:bfc0c9cf921045e001fe4fab32c77cf8a8c4f94fb1445d348df7d7ec862bb3ee", - "sha256:c580e2df61bc9bdcd709edb01139070218071ae4768f4f7e81f6df773e1578aa", - "sha256:ce523418041baac542aaf48612d6fb47be3e14dffdc1273ce32197a1be565369", - "sha256:de421d1e615d7c6560b3bfa447487bb6cd413ec94a7430b8a5c465abea795f8a" - ], - "version": "==1.0.4" + "sha256:023391cfefe584ac544c1ea0936976c0119b17dd27bb8280652cef1704f76428", + "sha256:23c56182822a1ed88e2a098ac56958dfec380696a9a943df203b9b41e4bcf5e4", + "sha256:76251513a2acad6c2e4b7aeffc5fcb807ee97a66cad5c2990557556555a6b7e9", + "sha256:81474a45c4074637a6dfc8fea4cdebf091ab5aa781c2cfcb94c43b16030badd7", + "sha256:8381172e03c5f6f947005fb146a53c5e5a9e0d630be4a40cbf8838e9324bfe1c", + "sha256:892749023da26420d194f37bfa30df1368aaac0149cfa3b2105db36b66549e37", + "sha256:98ec9d727bd998a35385abd56b062cf0cca216725ea7ec5068604ab566f7e97f", + "sha256:99e55488476a5f70e8d305fd31258f140e52f724f788bcc50c31ec846a2b3766", + "sha256:a9bd2312996e6e47605af305a1e5f091eba1bdd637cdd9986aec4885cb4c5530", + "sha256:add366944eb8ec73013a4f36e166c5a4f0f7628ffe1746bc5fe031347489e5e8", + "sha256:b9292c532538cf47846ca81056cfeab08b877c35fe7521d6524aa92ddcd833e2", + "sha256:d4c3a0242014cf4c84e9ea0ba3f13b48f02a3992de3da7b1116d11b816451195", + "sha256:d58315961dc5a5e740f41f2ac5c3a0ebc61ef472f8afeb4db7eeb3b863243105", + "sha256:ef8819d15973e0d6f69688bafc097a1fae081675c1de39807028869a1320b1a9", + "sha256:f00321998f0a6bad3fd068babf448a296d4b0b1f4dd424cab863ebe5ed54182f", + "sha256:fd17973fd4554715efd8d86b3e9200358e49e437fdb92a897ca127aced48b61c", + "sha256:fed7578fbaa6c301f27ed80834c1f7494ea7d335e269e98b9aee477cf0b3b487" + ], + "version": "==1.0.5" }, "nltk": { "hashes": [ @@ -273,11 +297,11 @@ }, "packaging": { "hashes": [ - "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8", - "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181" + "sha256:24e0da08660a87484d1602c30bb4902d74816b6985b93de36926f5bc95741858", + "sha256:78598185a7008a470d64526a8059de9aaa449238f280fc9eb6b13ba6c4109093" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.4" + "version": "==20.8" }, "plac": { "hashes": [ @@ -296,61 +320,61 @@ }, "preshed": { "hashes": [ - "sha256:13a779205d55ce323976ac06df597f9ec2d6f0563ebcf5652176cf4520c7d540", - "sha256:13de7525696fb7b1ab5d0dabca3cef49e17a49e662b9f651549ae2d953c3ad35", - "sha256:15beb09341daab611fa3db79db7f0bcc7b2efdc5a99f417a6903e3d9d68a92db", - "sha256:1df1b4452cbf4b8a36475e3025d2cfc3d44d1e1c5305163e119442a331b173ae", - "sha256:496edf5de0065481272914f02173d80dc80d6dccf2372af67d8d62b415ac3aa9", - "sha256:4e71b88ae027bd23d40f4f7b3e73d93479346abfec78cadea312d11d610e90f9", - "sha256:606516fd7166ac42406d1b54941aa2b443f4fa1eee11edfa37d9646ac36010a1", - "sha256:6dd33a8d88cbe77ef5592366891c491462225b7cb66499e35d63e4f9900e16ff", - "sha256:717b76ebbda8dd458f0f345f514fb9b62a1507352555a00b9332eb54cc4f35d4", - "sha256:8604bd028e94539061441cef926293efcdd9fd0b7a1f5bc4ac3cdb6400c83a9b", - "sha256:9c04c80314423e18fcfb1a7198c98bb8885d3b843546561be96cba45d8d17ad1", - "sha256:a11cb04a47cc1ff08765a8cd2ee1849e49970dda85c73c2a72f7e130ba6073ba", - "sha256:c777b7603755e9cca67067d4382bde8ba01221e3f6b06f65ba832fc6135258d9", - "sha256:cbf3d38278fb6d20dd67e624651e296d0c734c4107f1c1a67c97ee578ce6e813", - "sha256:dbbf660802ce87bcff683e89297dbf52a83ea260b94ec56ef1c2996bf967ad1a", - "sha256:eb4cd05415ce15a483eb2526304df4f98a17aea6ba80564fcdbf55f718f661a0", - "sha256:f415f994f422125d7c5a4718a3e4f57f0dfe0bde719799c5128761ab90b1f3b6" - ], - "version": "==3.0.4" + "sha256:12cbe1e378b4f1c6b06f5e4130408befe916e55ea1616e6aa63c5cd0ccd9c927", + "sha256:1bdededa7fd81f26a42bc9d11d542657c74746b7ea7fc2b2ca6d0ddbf1f93792", + "sha256:1ce0846cb7ebb2ea913d44ec2e296098c285443ecdea80ddf02656bbef4deacb", + "sha256:30f0c8ea85113d0565a1e3eb6222d00513ec39b56f3f9a2615e304575e65422e", + "sha256:56b9603517bb2a364418163236d6a147a1d722ff7546cbe085e76e25ae118e89", + "sha256:572899224578d30f6a67fadecb3d62b824866b4d2b6bad73f71abf7585db1389", + "sha256:5e06a49477bd257eea02bf823b5d3e201d00a19d6976523a58da8606b2358481", + "sha256:67c11e384ce4c008bc487ba3a29bafdfe038b9a2546ccfe0fe2160480b356fed", + "sha256:6e833f1632a1d0232bdc6df6c3542fb130ef044d8656b24576d9fd19e5f1e0d1", + "sha256:6f126bcc414a0304b54956f9dac2628a0f9bef1657d1b3a3837fc82b791aa2a1", + "sha256:85074eebf90a858a6b68242f1ae265ca99e1af45bf9dafcb9a83d49b0815a2e1", + "sha256:8a3adffde3126c2a0ab7d57cab1d605cb5f63da1ba88088ad3cf8debfd9aa4dc", + "sha256:8a560850b8c53c1487ba51c2b0f5769535512b36d3b129ad5796b64653abe2f9", + "sha256:9ebf444f8487782c84d7b5acb1d7195e603155882fafc4697344199eeeafbe5f", + "sha256:c6d3dba39ed5059aaf99767017b9568c75b2d0780c3481e204b1daecde00360e", + "sha256:ca4a7681b643b8356e7dfdab9cf668b2b34bd07ef4b09ebed44c8aeb3b1626ee", + "sha256:fb4d2e82add82d63b2c97802b759a58ff200d06b632e2edc48a9ced1e6472faf" + ], + "version": "==3.0.5" }, "py": { "hashes": [ - "sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2", - "sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342" + "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3", + "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==1.9.0" + "version": "==1.10.0" }, "pydantic": { "hashes": [ - "sha256:01f0291f4951580f320f7ae3f2ecaf0044cdebcc9b45c5f882a7e84453362420", - "sha256:0fe8b45d31ae53d74a6aa0bf801587bd49970070eac6a6326f9fa2a302703b8a", - "sha256:2182ba2a9290964b278bcc07a8d24207de709125d520efec9ad6fa6f92ee058d", - "sha256:2c1673633ad1eea78b1c5c420a47cd48717d2ef214c8230d96ca2591e9e00958", - "sha256:388c0c26c574ff49bad7d0fd6ed82fbccd86a0473fa3900397d3354c533d6ebb", - "sha256:4ba6b903e1b7bd3eb5df0e78d7364b7e831ed8b4cd781ebc3c4f1077fbcb72a4", - "sha256:6665f7ab7fbbf4d3c1040925ff4d42d7549a8c15fe041164adfe4fc2134d4cce", - "sha256:95d4410c4e429480c736bba0db6cce5aaa311304aea685ebcf9ee47571bfd7c8", - "sha256:a2fc7bf77ed4a7a961d7684afe177ff59971828141e608f142e4af858e07dddc", - "sha256:a3c274c49930dc047a75ecc865e435f3df89715c775db75ddb0186804d9b04d0", - "sha256:ab1d5e4d8de00575957e1c982b951bffaedd3204ddd24694e3baca3332e53a23", - "sha256:b11fc9530bf0698c8014b2bdb3bbc50243e82a7fa2577c8cfba660bcc819e768", - "sha256:b9572c0db13c8658b4a4cb705dcaae6983aeb9842248b36761b3fbc9010b740f", - "sha256:c68b5edf4da53c98bb1ccb556ae8f655575cb2e676aef066c12b08c724a3f1a1", - "sha256:c8200aecbd1fb914e1bd061d71a4d1d79ecb553165296af0c14989b89e90d09b", - "sha256:c9760d1556ec59ff745f88269a8f357e2b7afc75c556b3a87b8dda5bc62da8ba", - "sha256:ce2d452961352ba229fe1e0b925b41c0c37128f08dddb788d0fd73fd87ea0f66", - "sha256:dfaa6ed1d509b5aef4142084206584280bb6e9014f01df931ec6febdad5b200a", - "sha256:e5fece30e80087d9b7986104e2ac150647ec1658c4789c89893b03b100ca3164", - "sha256:f045cf7afb3352a03bc6cb993578a34560ac24c5d004fa33c76efec6ada1361a", - "sha256:f83f679e727742b0c465e7ef992d6da4a7e5268b8edd8fdaf5303276374bef52", - "sha256:fc21a37ff3f545de80b166e1735c4172b41b017948a3fb2d5e2f03c219eac50a" + "sha256:025bf13ce27990acc059d0c5be46f416fc9b293f45363b3d19855165fee1874f", + "sha256:185e18134bec5ef43351149fe34fda4758e53d05bb8ea4d5928f0720997b79ef", + "sha256:213125b7e9e64713d16d988d10997dabc6a1f73f3991e1ff8e35ebb1409c7dc9", + "sha256:24ca47365be2a5a3cc3f4a26dcc755bcdc9f0036f55dcedbd55663662ba145ec", + "sha256:38be427ea01a78206bcaf9a56f835784afcba9e5b88fbdce33bbbfbcd7841229", + "sha256:475f2fa134cf272d6631072554f845d0630907fce053926ff634cc6bc45bf1af", + "sha256:514b473d264671a5c672dfb28bdfe1bf1afd390f6b206aa2ec9fed7fc592c48e", + "sha256:59e45f3b694b05a69032a0d603c32d453a23f0de80844fb14d55ab0c6c78ff2f", + "sha256:5b24e8a572e4b4c18f614004dda8c9f2c07328cb5b6e314d6e1bbd536cb1a6c1", + "sha256:6e3874aa7e8babd37b40c4504e3a94cc2023696ced5a0500949f3347664ff8e2", + "sha256:8d72e814c7821125b16f1553124d12faba88e85405b0864328899aceaad7282b", + "sha256:a4143c8d0c456a093387b96e0f5ee941a950992904d88bc816b4f0e72c9a0009", + "sha256:b2b054d095b6431cdda2f852a6d2f0fdec77686b305c57961b4c5dd6d863bf3c", + "sha256:c59ea046aea25be14dc22d69c97bee629e6d48d2b2ecb724d7fe8806bf5f61cd", + "sha256:d1fe3f0df8ac0f3a9792666c69a7cd70530f329036426d06b4f899c025aca74e", + "sha256:d8df4b9090b595511906fa48deda47af04e7d092318bfb291f4d45dfb6bb2127", + "sha256:dba5c1f0a3aeea5083e75db9660935da90216f8a81b6d68e67f54e135ed5eb23", + "sha256:e682f6442ebe4e50cb5e1cfde7dda6766fb586631c3e5569f6aa1951fd1a76ef", + "sha256:ecb54491f98544c12c66ff3d15e701612fc388161fd455242447083350904730", + "sha256:f5b06f5099e163295b8ff5b1b71132ecf5866cc6e7f586d78d7d3fd6e8084608", + "sha256:f6864844b039805add62ebe8a8c676286340ba0c6d043ae5dea24114b82a319e", + "sha256:ffd180ebd5dd2a9ac0da4e8b995c9c99e7c74c31f985ba090ee01d681b1c4b95" ], "markers": "python_version >= '3.6'", - "version": "==1.7.2" + "version": "==1.7.3" }, "pyparsing": { "hashes": [ @@ -362,11 +386,11 @@ }, "pytest": { "hashes": [ - "sha256:4288fed0d9153d9646bfcdf0c0428197dba1ecb27a33bb6e031d002fa88653fe", - "sha256:c0a7e94a8cdbc5422a51ccdad8e6f1024795939cc89159a0ae7f0b316ad3823e" + "sha256:b12e09409c5bdedc28d308469e156127004a436b41e9b44f9bff6446cbab9152", + "sha256:d69e1a80b34fe4d596c9142f35d9e523d98a2838976f1a68419a8f051b24cec6" ], - "markers": "python_version >= '3.5'", - "version": "==6.1.2" + "markers": "python_version >= '3.6'", + "version": "==6.2.0" }, "pytest-cov": { "hashes": [ @@ -376,6 +400,14 @@ "index": "pypi", "version": "==2.10.1" }, + "python-dateutil": { + "hashes": [ + "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", + "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.8.1" + }, "regex": { "hashes": [ "sha256:02951b7dacb123d8ea6da44fe45ddd084aa6777d4b2454fa0da61d569c6fa538", @@ -441,58 +473,58 @@ }, "spacy": { "hashes": [ - "sha256:0bb16dcbd34d602ae2a47a28f721c581039ff75c25fb851331f72dc56af9920d", - "sha256:0ecbebc676113614feee1f585541d018cc1e3a16be9369496f502f72a2cd7eca", - "sha256:1a58c5f99feee4324b9fbf060f7518715821ca9bde0da3d3b457aea834ac5521", - "sha256:1be8582246c3ad9c706f2e660c057ef2219e32c5f65514ef91940a3a91e70119", - "sha256:2d2f2abd6c58aeb499cd66438ffadbde1bd1534b1834e4f220032bc504735c96", - "sha256:64ebb8a5621e138d2f7817a0d7cd6245f2eb5c3b59500d34cb317b9cbe9649ec", - "sha256:a2b06e532d72e50c2fb2249dc689af9a5462439019b039314d898616d6832c07", - "sha256:a40eafd971ca537b4a224e54c51936c1a114ff4ed11e884af3b0e8bd86ee94b3", - "sha256:a5c8805759114aac3a1db1b20f42af1124da5315be903ccb4c472cc8452393fb", - "sha256:acb18555935cc913ca92e5561358f097289a4a76af915ee22073ed6e89fe2bd3", - "sha256:bf257550383bfc0c2fd54b38386f6b970dc45dd2a570a72fcace340fe7579b3c", - "sha256:c5d09b613f65f2024e30dab7ef9a3f69195bb221443ac573e75dad17939c61ba", - "sha256:e49b22e2f3ab35a2c18cf28932ba258b2c5168d681e92e35a7c18131100301a9" + "sha256:118a92582b1054b5de7bc5ed763f47ee89388847ede1e0597c6df4b509643e14", + "sha256:14bb12de0d03beb2d8309f194154db70fb364a0fae727e864c2b0228bf3438d8", + "sha256:315278ab60094643baecd866017c7d4cbd966efd2d517ad0e6c888edf7fa5aef", + "sha256:3e9496f5ea3d08f2b9fc3e326c2c8cc7886df0db982a41dca2521d3f22ca043e", + "sha256:45497775e986d2790c7ee3625c565e3ef7e9ffa607d50230aa3382dd6d9b26e7", + "sha256:49f7818bd8a597887013fdaaea3263d8b6e99ca64db0933c32f0896158898209", + "sha256:4b7c0c8ab94c6433f08633fef415a054d1f3345b205bcb064578c79f35192917", + "sha256:4e2e79ab7c2af2af8a91913d6d096dd2e6a5a422142cfb35b30c574f776b9fd7", + "sha256:c7b3d7928d047e5abcd591f8cf6a1c508da16423d371b8a21332101cab46ff7c", + "sha256:cecb9987a875620d0f185ff07dd04cd64d5097de48689e506256a27a46a644a1", + "sha256:ec9eebfae2a35e464d1c35aa2109422765967ba5b10fa9f11da8873801d2241a", + "sha256:f153d8aa6104694389ef85c578ac1a3900b142f108248c7b9f5790d010fbe4ee", + "sha256:faa728e56f7b8fe0a70c4bedc42611da23de86b783f6ad588a92c115f427b90c" ], "index": "pypi", - "version": "==2.3.4" + "version": "==2.3.5" }, "srsly": { "hashes": [ - "sha256:02acfa09769380023e9bbed5c63f5f41b04dbc13ff392a7284df685127d224ae", - "sha256:2ae6327e013934126f2b7082ce36284785eed951635ac99c73e39de75ecd1345", - "sha256:2b9e2b7c00e522d6e54952564d702e72bc4729cc828e27a01b4dac70b870d44a", - "sha256:39a8bd6b383b4ea5c3a6793d790e1356e236701f421badef278a0ea80bc95d30", - "sha256:3f46e5efbbb0e857f302394257539aae4487c201cdafc3519db4bde897341fee", - "sha256:4b8bd311fa20fa5d1a0deac0dd8f3c43c7ca23df2c483a7102d96acf2cddb506", - "sha256:660b80ffa3ea4ef9db254e5a22d2f52294f15342910c3274844a8f5a81369cbc", - "sha256:713766bf4289ebe6f21d1e5203b6b75f71fd4512e877dbe5a3f660b4a51d77ac", - "sha256:9ca5633a5303ce0d0b84d1bdb6d029f665ba2b7d320f5482525b125ddfb8a390", - "sha256:b4fb0ff57e25a99e9ac9a61b262478efe8493b3cff0f50812c09534c76688b0b", - "sha256:c1fa001bbd87a771998c8e2487991fc0dfe16edc09e3dec0d64f9472832f89bd", - "sha256:deee259837443ea8a4c194967f56eb62809c3cf3a11cd9d4f425de4aea5ecb81", - "sha256:f7aee40cd66f190e7694d7e7c83be018b1a7b9546a305db52c8793d93b1d0a7e" - ], - "version": "==1.0.4" + "sha256:11447f8e659e1f62f29302252fb057f179031457b36c83426027182f624fe565", + "sha256:23c7205b8c1cac49a03521bee37f0afe3680d9f0ec18c75ab3ac39bd3e15272b", + "sha256:2615b8713dfe793ca57925076b0869385d56754816b1eaee5490a6827a1cb5c7", + "sha256:334f29435099e644a8047b63d60b8386a98b5f7b4739f7efc86b46ca0200aa0e", + "sha256:4c43a1f28e555891a1e65650adea2c5d0f0fe4b3d63821de65c8357f32c3a11c", + "sha256:779ebfaa3cf1d5c0f1286ac1baf06af5f2a17bb103622992c71acc6ac20b2781", + "sha256:8fc4c0641537262e15c7b5b57edc47487b15ac47b696adcb81e0a770ef78e8f5", + "sha256:a1449da4195e30a3bd1fd3122e5b1a0c57703843c590643555c412fc87132aa0", + "sha256:a2746afccfd4f51f0793cccc2b6d5e8a564c962870feec5c77408244c1dbb3c5", + "sha256:a696e9c925e91f76ec53840c55483a4fbf76cb717424410a4f249d4805439038", + "sha256:b5b887328ac6e210842560fcf32a29c2a9c1ed38c6d47479cadc03d81940da8c", + "sha256:d3dd796372367c71946d0cd6f734e49db3d99dd13a57bdac937d9eb62689fc9e", + "sha256:fd5e1e01f5fd0f532a6f3977bb74facc42f1b7155402ee3d06c07a73e83e3c47" + ], + "version": "==1.0.5" }, "thinc": { "hashes": [ - "sha256:16002dc0feb941d4fc985cee90a0af8081190d098247d4e7f6db7d3da1ae0222", - "sha256:187b8245124305b0a709a7f7c79775742c270d6c176a3879afecdaadcfb92fb4", - "sha256:1e516f78d63ab710fcea49f0eed1c8068f2ca29ce5c6323a2a23fb8140fceb45", - "sha256:241b8c9e9e068d1728e1fb97d95bfc4a956a8eebb40c63ecbe68aad416dac26c", - "sha256:2ea3fd503974078a24a9f3d07c49d2adf4f0252530780eda38de3bf6517237e2", - "sha256:312bbb18f668e190b14563c2d38928d187faaef69217ea5ff4d7e0d6e2e1f21c", - "sha256:39405be25a5d232e4fb6e1d4830568bd826867d43878d093497c9db277f3e90f", - "sha256:63471847f282ca2f2b2f88d3e6fb15240f4ea0ec6931549ddceb57967deeca18", - "sha256:862e846033a31644be600c7f9606a965e913bcc42a45cad64a0c77c81c677bdf", - "sha256:9e4772c1094b2997394fe95377eb76e15c2a2f50cdb46e2351442a8a9c370146", - "sha256:c98491b083165f48bda95f5533f7d9dbd3980d32ad621bfe579ff08ef625a4d3", - "sha256:d28d5b300f7e18fa143b3efba0c9321e886fa6ec36a553fac5b32f686df6584a", - "sha256:e609261414e6978692c17b9ae091925edf3361b47c8f8c086390684ce604805f" - ], - "version": "==7.4.3" + "sha256:02b71ae5a0fa906a0aca968bd65589e0ab9fabd511e57be839774228b1509224", + "sha256:10bafe5ddce698180098345b9c55f762dc3456558be844d35d64175e511581b6", + "sha256:24086aa0fb72f466782115d529574a825c89afa62eb817962b9339f61ab50e0d", + "sha256:29a47ad0289dda0520b5af8538b30e8134553130200b83c34311feb71739968d", + "sha256:309ec4cae81f4de2e4e4fbd0bcb52b10bef4b1a6352c6a9143f6a53d3b1060ef", + "sha256:5743fde41706252ec6ce4737c68d3505f7e1cc3d4431174a17149838d594f8cb", + "sha256:5774007b5c52501cab5e2970cadca84923b4c420fff06172f2d0c86531973ce8", + "sha256:8b647de79fe5f98cd327983bf0e27d006b48ad9694ceabdb9a3832b614ed1618", + "sha256:c408ab24b24e6368ce4b6ddebb579118042a22d3f2f2c4e19ca67e3eadc9ed33", + "sha256:c43ed753aa70bc619e42e168be4926c8a47799af6121ff0727ba99b330afbb44", + "sha256:cce68c5ea54cd32cef661858363509afdedad047027e8cdf0dc4edec0c2cc010", + "sha256:d01ab1480d37ebefcac22d63ffe01916c9f025ae3dbdbe5824ac3ea5cce8e3fd", + "sha256:fae320de65af70786c1526ffc33b88f2da650d3106f5f9a06b37f0ac3944a44f" + ], + "version": "==7.4.5" }, "tokenizers": { "hashes": [ @@ -551,11 +583,11 @@ }, "tqdm": { "hashes": [ - "sha256:3d3f1470d26642e88bd3f73353cb6ff4c51ef7d5d7efef763238f4bc1f7e4e81", - "sha256:5ff3f5232b19fa4c5531641e480b7fad4598819f708a32eb815e6ea41c5fa313" + "sha256:38b658a3e4ecf9b4f6f8ff75ca16221ae3378b2e175d846b6b33ea3a20852cf5", + "sha256:d4f413aecb61c9779888c64ddf0c62910ad56dcbe857d8922bb505d4dbff0df1" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==4.53.0" + "version": "==4.54.1" }, "unidecode": { "hashes": [ @@ -621,10 +653,10 @@ }, "certifi": { "hashes": [ - "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd", - "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4" + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" ], - "version": "==2020.11.8" + "version": "==2020.12.5" }, "cffi": { "hashes": [ @@ -649,12 +681,14 @@ "sha256:9cc46bc107224ff5b6d04369e7c595acb700c3613ad7bcf2e2012f62ece80c35", "sha256:9f7a31251289b2ab6d4012f6e83e58bc3b96bd151f5b5262467f4bb6b34a7c26", "sha256:9ffb888f19d54a4d4dfd4b3f29bc2c16aa4972f1c2ab9c4ab09b8ab8685b9c2b", + "sha256:a5ed8c05548b54b998b9498753fb9cadbfd92ee88e884641377d8a8b291bcc01", "sha256:a7711edca4dcef1a75257b50a2fbfe92a65187c47dab5a0f1b9b332c5919a3fb", "sha256:af5c59122a011049aad5dd87424b8e65a80e4a6477419c0c1015f73fb5ea0293", "sha256:b18e0a9ef57d2b41f5c68beefa32317d286c3d6ac0484efd10d6e07491bb95dd", "sha256:b4e248d1087abf9f4c10f3c398896c87ce82a9856494a7155823eb45a892395d", "sha256:ba4e9e0ae13fc41c6b23299545e5ef73055213e466bd107953e4a013a5ddd7e3", "sha256:c6332685306b6417a91b1ff9fae889b3ba65c2292d64bd9245c093b1b284809d", + "sha256:d5ff0621c88ce83a28a10d2ce719b2ee85635e85c515f12bac99a95306da4b2e", "sha256:d9efd8b7a3ef378dd61a1e77367f1924375befc2eba06168b6ebfa903a5e59ca", "sha256:df5169c4396adc04f9b0a05f13c074df878b6052430e03f50e68adf3a57aa28d", "sha256:ebb253464a5d0482b191274f1c8bf00e33f7e0b9c66405fbffc61ed2c839c775", @@ -682,31 +716,23 @@ }, "cryptography": { "hashes": [ - "sha256:07ca431b788249af92764e3be9a488aa1d39a0bc3be313d826bbec690417e538", - "sha256:13b88a0bd044b4eae1ef40e265d006e34dbcde0c2f1e15eb9896501b2d8f6c6f", - "sha256:32434673d8505b42c0de4de86da8c1620651abd24afe91ae0335597683ed1b77", - "sha256:3cd75a683b15576cfc822c7c5742b3276e50b21a06672dc3a800a2d5da4ecd1b", - "sha256:4e7268a0ca14536fecfdf2b00297d4e407da904718658c1ff1961c713f90fd33", - "sha256:545a8550782dda68f8cdc75a6e3bf252017aa8f75f19f5a9ca940772fc0cb56e", - "sha256:55d0b896631412b6f0c7de56e12eb3e261ac347fbaa5d5e705291a9016e5f8cb", - "sha256:5849d59358547bf789ee7e0d7a9036b2d29e9a4ddf1ce5e06bb45634f995c53e", - "sha256:6dc59630ecce8c1f558277ceb212c751d6730bd12c80ea96b4ac65637c4f55e7", - "sha256:7117319b44ed1842c617d0a452383a5a052ec6aa726dfbaffa8b94c910444297", - "sha256:75e8e6684cf0034f6bf2a97095cb95f81537b12b36a8fedf06e73050bb171c2d", - "sha256:7b8d9d8d3a9bd240f453342981f765346c87ade811519f98664519696f8e6ab7", - "sha256:a035a10686532b0587d58a606004aa20ad895c60c4d029afa245802347fab57b", - "sha256:a4e27ed0b2504195f855b52052eadcc9795c59909c9d84314c5408687f933fc7", - "sha256:a733671100cd26d816eed39507e585c156e4498293a907029969234e5e634bc4", - "sha256:a75f306a16d9f9afebfbedc41c8c2351d8e61e818ba6b4c40815e2b5740bb6b8", - "sha256:bd717aa029217b8ef94a7d21632a3bb5a4e7218a4513d2521c2a2fd63011e98b", - "sha256:d25cecbac20713a7c3bc544372d42d8eafa89799f492a43b79e1dfd650484851", - "sha256:d26a2557d8f9122f9bf445fc7034242f4375bd4e95ecda007667540270965b13", - "sha256:d3545829ab42a66b84a9aaabf216a4dce7f16dbc76eb69be5c302ed6b8f4a29b", - "sha256:d3d5e10be0cf2a12214ddee45c6bd203dab435e3d83b4560c03066eda600bfe3", - "sha256:efe15aca4f64f3a7ea0c09c87826490e50ed166ce67368a68f315ea0807a20df" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==3.2.1" + "sha256:0003a52a123602e1acee177dc90dd201f9bb1e73f24a070db7d36c588e8f5c7d", + "sha256:0e85aaae861d0485eb5a79d33226dd6248d2a9f133b81532c8f5aae37de10ff7", + "sha256:594a1db4511bc4d960571536abe21b4e5c3003e8750ab8365fafce71c5d86901", + "sha256:69e836c9e5ff4373ce6d3ab311c1a2eed274793083858d3cd4c7d12ce20d5f9c", + "sha256:788a3c9942df5e4371c199d10383f44a105d67d401fb4304178020142f020244", + "sha256:7e177e4bea2de937a584b13645cab32f25e3d96fc0bc4a4cf99c27dc77682be6", + "sha256:83d9d2dfec70364a74f4e7c70ad04d3ca2e6a08b703606993407bf46b97868c5", + "sha256:84ef7a0c10c24a7773163f917f1cb6b4444597efd505a8aed0a22e8c4780f27e", + "sha256:9e21301f7a1e7c03dbea73e8602905a4ebba641547a462b26dd03451e5769e7c", + "sha256:9f6b0492d111b43de5f70052e24c1f0951cb9e6022188ebcb1cc3a3d301469b0", + "sha256:a69bd3c68b98298f490e84519b954335154917eaab52cf582fa2c5c7efc6e812", + "sha256:b4890d5fb9b7a23e3bf8abf5a8a7da8e228f1e97dc96b30b95685df840b6914a", + "sha256:c366df0401d1ec4e548bebe8f91d55ebcc0ec3137900d214dd7aac8427ef3030", + "sha256:dc42f645f8f3a489c3dd416730a514e7a91a59510ddaadc09d04224c098d3302" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==3.3.1" }, "decorator": { "hashes": [ @@ -723,6 +749,30 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==0.16" }, + "elasticsearch": { + "hashes": [ + "sha256:4ebd34fd223b31c99d9f3b6b6236d3ac18b3046191a37231e8235b06ae7db955", + "sha256:a725dd923d349ca0652cf95d6ce23d952e2153740cf4ab6daf4a2d804feeed48" + ], + "index": "pypi", + "version": "==7.10.1" + }, + "elasticsearch-dsl": { + "hashes": [ + "sha256:0ed75f6ff037e36b2397a8e92cae0ddde79b83adc70a154b8946064cb62f7301", + "sha256:9390d8e5cf82ebad3505e7f656e407259cf703f5a4035f211cef454127672572" + ], + "index": "pypi", + "version": "==7.3.0" + }, + "fatcat-openapi-client": { + "hashes": [ + "sha256:2dfec611d1eb8396ba8812ea3c6dfdc250b098870aafcecf895ae31fa2a90c5f", + "sha256:fd7f3089d3cc223c1b560125b688e8ce0393ba76edfe87506464db7a1e38d0b1" + ], + "markers": "python_full_version >= '3.5.0'", + "version": "==0.3.2" + }, "ftfy": { "hashes": [ "sha256:51c7767f8c4b47d291fcef30b9625fb5341c06a31e6a3b627039c706c42f3720" @@ -840,11 +890,11 @@ }, "packaging": { "hashes": [ - "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8", - "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181" + "sha256:24e0da08660a87484d1602c30bb4902d74816b6985b93de36926f5bc95741858", + "sha256:78598185a7008a470d64526a8059de9aaa449238f280fc9eb6b13ba6c4109093" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.4" + "version": "==20.8" }, "parso": { "hashes": [ @@ -901,11 +951,11 @@ }, "py": { "hashes": [ - "sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2", - "sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342" + "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3", + "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==1.9.0" + "version": "==1.10.0" }, "pycparser": { "hashes": [ @@ -917,39 +967,39 @@ }, "pydantic": { "hashes": [ - "sha256:01f0291f4951580f320f7ae3f2ecaf0044cdebcc9b45c5f882a7e84453362420", - "sha256:0fe8b45d31ae53d74a6aa0bf801587bd49970070eac6a6326f9fa2a302703b8a", - "sha256:2182ba2a9290964b278bcc07a8d24207de709125d520efec9ad6fa6f92ee058d", - "sha256:2c1673633ad1eea78b1c5c420a47cd48717d2ef214c8230d96ca2591e9e00958", - "sha256:388c0c26c574ff49bad7d0fd6ed82fbccd86a0473fa3900397d3354c533d6ebb", - "sha256:4ba6b903e1b7bd3eb5df0e78d7364b7e831ed8b4cd781ebc3c4f1077fbcb72a4", - "sha256:6665f7ab7fbbf4d3c1040925ff4d42d7549a8c15fe041164adfe4fc2134d4cce", - "sha256:95d4410c4e429480c736bba0db6cce5aaa311304aea685ebcf9ee47571bfd7c8", - "sha256:a2fc7bf77ed4a7a961d7684afe177ff59971828141e608f142e4af858e07dddc", - "sha256:a3c274c49930dc047a75ecc865e435f3df89715c775db75ddb0186804d9b04d0", - "sha256:ab1d5e4d8de00575957e1c982b951bffaedd3204ddd24694e3baca3332e53a23", - "sha256:b11fc9530bf0698c8014b2bdb3bbc50243e82a7fa2577c8cfba660bcc819e768", - "sha256:b9572c0db13c8658b4a4cb705dcaae6983aeb9842248b36761b3fbc9010b740f", - "sha256:c68b5edf4da53c98bb1ccb556ae8f655575cb2e676aef066c12b08c724a3f1a1", - "sha256:c8200aecbd1fb914e1bd061d71a4d1d79ecb553165296af0c14989b89e90d09b", - "sha256:c9760d1556ec59ff745f88269a8f357e2b7afc75c556b3a87b8dda5bc62da8ba", - "sha256:ce2d452961352ba229fe1e0b925b41c0c37128f08dddb788d0fd73fd87ea0f66", - "sha256:dfaa6ed1d509b5aef4142084206584280bb6e9014f01df931ec6febdad5b200a", - "sha256:e5fece30e80087d9b7986104e2ac150647ec1658c4789c89893b03b100ca3164", - "sha256:f045cf7afb3352a03bc6cb993578a34560ac24c5d004fa33c76efec6ada1361a", - "sha256:f83f679e727742b0c465e7ef992d6da4a7e5268b8edd8fdaf5303276374bef52", - "sha256:fc21a37ff3f545de80b166e1735c4172b41b017948a3fb2d5e2f03c219eac50a" + "sha256:025bf13ce27990acc059d0c5be46f416fc9b293f45363b3d19855165fee1874f", + "sha256:185e18134bec5ef43351149fe34fda4758e53d05bb8ea4d5928f0720997b79ef", + "sha256:213125b7e9e64713d16d988d10997dabc6a1f73f3991e1ff8e35ebb1409c7dc9", + "sha256:24ca47365be2a5a3cc3f4a26dcc755bcdc9f0036f55dcedbd55663662ba145ec", + "sha256:38be427ea01a78206bcaf9a56f835784afcba9e5b88fbdce33bbbfbcd7841229", + "sha256:475f2fa134cf272d6631072554f845d0630907fce053926ff634cc6bc45bf1af", + "sha256:514b473d264671a5c672dfb28bdfe1bf1afd390f6b206aa2ec9fed7fc592c48e", + "sha256:59e45f3b694b05a69032a0d603c32d453a23f0de80844fb14d55ab0c6c78ff2f", + "sha256:5b24e8a572e4b4c18f614004dda8c9f2c07328cb5b6e314d6e1bbd536cb1a6c1", + "sha256:6e3874aa7e8babd37b40c4504e3a94cc2023696ced5a0500949f3347664ff8e2", + "sha256:8d72e814c7821125b16f1553124d12faba88e85405b0864328899aceaad7282b", + "sha256:a4143c8d0c456a093387b96e0f5ee941a950992904d88bc816b4f0e72c9a0009", + "sha256:b2b054d095b6431cdda2f852a6d2f0fdec77686b305c57961b4c5dd6d863bf3c", + "sha256:c59ea046aea25be14dc22d69c97bee629e6d48d2b2ecb724d7fe8806bf5f61cd", + "sha256:d1fe3f0df8ac0f3a9792666c69a7cd70530f329036426d06b4f899c025aca74e", + "sha256:d8df4b9090b595511906fa48deda47af04e7d092318bfb291f4d45dfb6bb2127", + "sha256:dba5c1f0a3aeea5083e75db9660935da90216f8a81b6d68e67f54e135ed5eb23", + "sha256:e682f6442ebe4e50cb5e1cfde7dda6766fb586631c3e5569f6aa1951fd1a76ef", + "sha256:ecb54491f98544c12c66ff3d15e701612fc388161fd455242447083350904730", + "sha256:f5b06f5099e163295b8ff5b1b71132ecf5866cc6e7f586d78d7d3fd6e8084608", + "sha256:f6864844b039805add62ebe8a8c676286340ba0c6d043ae5dea24114b82a319e", + "sha256:ffd180ebd5dd2a9ac0da4e8b995c9c99e7c74c31f985ba090ee01d681b1c4b95" ], "markers": "python_version >= '3.6'", - "version": "==1.7.2" + "version": "==1.7.3" }, "pygments": { "hashes": [ - "sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0", - "sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773" + "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716", + "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08" ], "markers": "python_version >= '3.5'", - "version": "==2.7.2" + "version": "==2.7.3" }, "pyparsing": { "hashes": [ @@ -961,11 +1011,19 @@ }, "pytest": { "hashes": [ - "sha256:4288fed0d9153d9646bfcdf0c0428197dba1ecb27a33bb6e031d002fa88653fe", - "sha256:c0a7e94a8cdbc5422a51ccdad8e6f1024795939cc89159a0ae7f0b316ad3823e" + "sha256:b12e09409c5bdedc28d308469e156127004a436b41e9b44f9bff6446cbab9152", + "sha256:d69e1a80b34fe4d596c9142f35d9e523d98a2838976f1a68419a8f051b24cec6" ], - "markers": "python_version >= '3.5'", - "version": "==6.1.2" + "markers": "python_version >= '3.6'", + "version": "==6.2.0" + }, + "python-dateutil": { + "hashes": [ + "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", + "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.8.1" }, "pyyaml": { "hashes": [ @@ -1041,11 +1099,11 @@ }, "tqdm": { "hashes": [ - "sha256:3d3f1470d26642e88bd3f73353cb6ff4c51ef7d5d7efef763238f4bc1f7e4e81", - "sha256:5ff3f5232b19fa4c5531641e480b7fad4598819f708a32eb815e6ea41c5fa313" + "sha256:38b658a3e4ecf9b4f6f8ff75ca16221ae3378b2e175d846b6b33ea3a20852cf5", + "sha256:d4f413aecb61c9779888c64ddf0c62910ad56dcbe857d8922bb505d4dbff0df1" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==4.53.0" + "version": "==4.54.1" }, "traitlets": { "hashes": [ @@ -1104,6 +1162,7 @@ "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c", "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f" ], + "markers": "python_version < '3.8'", "version": "==3.7.4.3" }, "unidecode": { diff --git a/fuzzycat/matching.py b/fuzzycat/matching.py new file mode 100644 index 0000000..518198a --- /dev/null +++ b/fuzzycat/matching.py @@ -0,0 +1,91 @@ +import elasticsearch +from fatcat_openapi_client import ContainerEntity, ReleaseEntity + +def match_release_fuzzy(release: ReleaseEntity, size=5, es=None) -> List[ReleaseEntity]: + """ + Given a release entity, return a number similar release entities from + fatcat using Elasticsearch. + """ + assert isinstance(release, ReleaseEntity) + + if size is None or size == 0: + size = 10000 # or any large number + + if isinstance(es, str): + es = elasticsearch.Elasticsearch([es]) + if es is None: + es = elasticsearch.Elasticsearch() + + # Try to match by external identifier. + ext_ids = release.ext_ids + attrs = { + "doi": "doi", + "wikidata_qid": "wikidata_qid", + "isbn13": "isbn13", + "pmid": "pmid", + "pmcid": "pmcid", + "core": "code_id", + "arxiv": "arxiv_id", + "jstor": "jstor_id", + "ark": "ark_id", + "mag": "mag_id", + } + for attr, es_field in attrs.items(): + value = getattr(ext_ids, attr) + if not value: + continue + s = ( + elasticsearch_dsl.Search(using=es, index="fatcat_release") + .query("term", **{es_field: value}) + .extra(size=size) + ) + print(s) + resp = s.execute() + if len(resp) > 0: + return response_to_entity_list(resp, entity_type=ReleaseEntity) + + body = { + "query": {"match": {"title": {"query": release.title, "operator": "AND"}}}, + "size": size, + } + resp = es.search(body=body, index="fatcat_release") + if resp["hits"]["total"] > 0: + return response_to_entity_list(resp, entity_type=ReleaseEntity) + + # Get fuzzy. + # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness + body = { + "query": { + "match": { + "title": { + "query": release.title, + "operator": "AND", + "fuzziness": "AUTO", + } + } + }, + "size": size, + } + resp = es.search(body=body, index="fatcat_release") + if resp["hits"]["total"] > 0: + return response_to_entity_list(resp, entity_type=ReleaseEntity) + + # TODO: perform more queries on other fields. + return [] + + +def response_to_entity_list(response, size=5, entity_type=ReleaseEntity): + """ + Convert an elasticsearch result to a list of entities. Accepts both a + dictionary and an elasticsearch_dsl.response.Response. + + We take the ids from elasticsearch and retrieve entities via API. + """ + if isinstance(response, dict): + ids = [hit["_source"]["ident"] for hit in response["hits"]["hits"]][:size] + return retrieve_entity_list(ids, entity_type=entity_type) + elif isinstance(response, elasticsearch_dsl.response.Response): + ids = [hit.to_dict().get("ident") for hit in response] + return retrieve_entity_list(ids, entity_type=entity_type) + else: + raise ValueError("cannot convert {}".format(response)) diff --git a/setup.py b/setup.py index d8d2f28..1833fed 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,9 @@ with open("README.md", "r") as fh: "pydantic", "toml", "unidecode>=0.10", + "fatcat-openapi-client", # https://pypi.org/project/fatcat-openapi-client/ + "elasticsearch", + "elasticsearch-dsl", ], extras_require={"dev": [ "ipython", -- cgit v1.2.3