diff options
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/files/changelog_3469683.json | 1 | ||||
-rw-r--r-- | python/tests/files/example_shadow.json | 10 | ||||
-rw-r--r-- | python/tests/files/file_bcah4zp5tvdhjl5bqci2c2lgfa.json | 1 | ||||
-rw-r--r-- | python/tests/files/release_etodop5banbndg3faecnfm6ozi.json | 1 | ||||
-rw-r--r-- | python/tests/import_shadow.py | 61 | ||||
-rw-r--r-- | python/tests/transform_elasticsearch.py (renamed from python/tests/transform_tests.py) | 62 |
6 files changed, 132 insertions, 4 deletions
diff --git a/python/tests/files/changelog_3469683.json b/python/tests/files/changelog_3469683.json new file mode 100644 index 00000000..7a847b16 --- /dev/null +++ b/python/tests/files/changelog_3469683.json @@ -0,0 +1 @@ +{"index":3469683,"editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","timestamp":"2020-01-30T05:04:39.738601Z","editgroup":{"editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","editor_id":"scmbogxw25evtcesfcab5qaboa","editor":{"editor_id":"scmbogxw25evtcesfcab5qaboa","username":"crawl-bot","is_admin":true,"is_bot":true,"is_active":true},"changelog_index":3469683,"created":"2020-01-30T05:04:39.738601Z","description":"Files crawled from web using sandcrawler ingest tool","extra":{"agent":"fatcat_tools.IngestFileResultImporter","git_rev":"v0.3.1-280-ga889f32"},"edits":{"containers":[],"creators":[],"files":[{"edit_id":"ba819a2b-a4d0-43e6-9e5c-505284c8ae42","ident":"e3lmbzqyjjam3a5nqnccc6d654","revision":"7a606095-9d07-41ee-898a-bcf8b6bc0004","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1080/23802359.2020.1715878"}},{"edit_id":"a71c4b91-d599-4422-a7a6-527562161278","ident":"e62h2fa6fba6ve3lukv7n635fq","revision":"1374f1bd-684a-48b9-aaff-65b9e90083b5","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1186/s13071-020-3909-6"}},{"edit_id":"82e4d65d-0335-4a40-b9c9-bf38f9bd7b19","ident":"fam7ii245zasvnesikw7bhmoii","revision":"327e3358-2b2b-4919-9613-449bdbb76c55","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.23917/jjr.v9i1.8294"}},{"edit_id":"49e987dc-fc6f-4391-8b75-33176f03b5cb","ident":"fa6sljsebjapfojqapxd3dj4um","revision":"7f51f4fa-e448-410a-b7d5-7ae9cdf9fcb8","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.33536/jcpe.v4i1.293"}},{"edit_id":"e62ed1ca-3961-423b-ab05-967694e32f70","ident":"fhhzbabf3zcx7p2tor2omqveyq","revision":"d40c3f8c-255f-4913-ace8-06db6af66697","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.5802/smai-jcm.59"}},{"edit_id":"337b8ed6-1248-4872-81da-d16f6db021e6","ident":"fllcoo4smfdyrh5q5lmu72e7cq","revision":"724b26cd-6ab1-46a1-bc88-a87410fdf102","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1080/1343943x.2020.1717970"}},{"edit_id":"43a0c9d5-692e-4c73-b154-d8769854d268","ident":"fzshcc6sfzegbduum2763o3lgy","revision":"a8f5e34b-fbf4-4d52-987e-887455e6bd50","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.23917/jjr.v9i1.8099"}},{"edit_id":"48767850-1141-47e3-80ef-556605e3588c","ident":"grdztt2vwjd65ovcifeo3ysbam","revision":"12dceb74-6333-4a17-b1e8-4afac9df1888","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.26565/2220-8089-2019-36-02"}},{"edit_id":"4b3b92ac-c250-4b78-9e91-43fc893c935e","ident":"hgpzsozky5amvcts45qb6nhqum","revision":"b5951fd6-dd36-422c-9770-8cdfb7e6d82d","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.33549/physiolres.934410"}},{"edit_id":"dabff0df-6f52-4adb-a63f-1965f30d8bd2","ident":"hpxdh7mykng77jyfoolpixzw2y","revision":"0e06e57c-0756-4ee5-8102-0d96478aa23f","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.26452/ijrps.v11i1.1916"}},{"edit_id":"b757f2b5-5f97-45b8-bfcd-9c9fb08047b8","ident":"htorhznxdfdppbvpf57nrz536q","revision":"034f4640-fbb6-4123-9ad8-f934220ab820","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1039/c9ra09196c"}},{"edit_id":"5662469f-a4d3-4339-88a0-4d27ef3f5f58","ident":"if6a63p7rnaxbjkcr4egtihj74","revision":"08d384cf-396c-41e1-a14d-6bde24b47323","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1192/bjb.2019.92"}},{"edit_id":"540a1ec1-7879-42f4-b749-d3287eb26ef7","ident":"i76ou5g2jndo7gyjrxdhoks3bm","revision":"eddbf701-59c3-4f33-b26b-87ad29297f65","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.33549/physiolres.934355"}},{"edit_id":"04a0b805-b09f-4ff6-80b3-77cb643c72d7","ident":"jl2z2az4sjfpdigdts3xjm24vy","revision":"a541e130-07be-43ae-8fbf-b0295d5b576f","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1080/23802359.2020.1719936"}},{"edit_id":"3e4566b9-7e3f-4e02-aa81-b6299545b150","ident":"jsrgx72devbadbyyiqwm2bl4aa","revision":"f51f164e-0e96-4245-973b-3408250ebc3b","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.24193/ed21.2019.17.09"}},{"edit_id":"80c2f0ca-ba43-4481-b5a5-c15d87d4d7b4","ident":"kijz6wlf25dito3av5snrz345a","revision":"fa1b5279-2221-4063-853e-070ca2d5954b","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1017/s2045796020000037"}},{"edit_id":"c0f44f12-e4c7-45d4-a79a-a3ce2d628e78","ident":"ky3cjfbzejecxaa5tjaaucurb4","revision":"0e227663-f825-42e8-bf57-ba50fea60bf5","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1017/s204579602000013x"}},{"edit_id":"92093947-7ff5-48ad-bc47-b26d4c0959c8","ident":"llfciusk6jf6rd35ofpuufmgfe","revision":"ed89f8b0-98cd-4886-b99e-b5d276b2ac9f","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.33549/physiolres.934349"}},{"edit_id":"4cb0e042-4298-4d0b-98b8-3c0c808642ea","ident":"mdw33cq7svdfnlaevnpih7bsyu","revision":"4a0ed3ce-56f2-4299-861c-051e7c06499d","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.23917/jjr.v9i2.8073"}},{"edit_id":"611fc1e1-79e1-4e10-a702-0420919407f6","ident":"mwghms3u2zecdf2x5zk7tzs4mq","revision":"101d7d0f-6aca-4df3-a08a-f4a3f26d3cd4","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1186/s12864-020-6509-0"}},{"edit_id":"b9a350bd-3497-4c83-97c1-0a20a420a287","ident":"ooejwh3g35cfrmmk4bvjcqxrai","revision":"8a4e8a05-3fb4-4d83-a545-35f152e24f58","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.6018/cpd.340501"}},{"edit_id":"aeab2a58-ec4e-4a2a-82ee-a0d2428eca50","ident":"pepmo2ajfzh7ldtqdugj4p5zvy","revision":"7d13f42d-ce94-4537-8c56-0158d9bf99e9","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.6018/cpd.347981"}},{"edit_id":"f3cd1575-fcfd-4f6b-ab48-28b06592df85","ident":"qcw5h7c5uncbbphtrp3zibn5y4","revision":"fa7b2868-777e-4046-98ca-b146c0056180","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1017/s1742170519000425"}},{"edit_id":"3c0ffa45-7f39-4393-a73c-0615bf9543f6","ident":"qfmufcdlrbfyhcjemvut5om23i","revision":"fac63461-4790-49a8-9b75-b23e9a369529","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.34225/jidc.2019.14.2.57"}},{"edit_id":"4ad40f3a-55f4-4aa8-9016-d04f904b2163","ident":"qrpp45vopfbvzanalh4lmpiz24","revision":"2ac1355f-cba9-49ed-b260-9eea5ec09473","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.24193/ed21.2019.17.19"}},{"edit_id":"1ec3a67b-796c-4393-93c2-ddac4ea66e0f","ident":"q7cv6lezvjalpg5xd4tckkbmsi","revision":"702adc9f-9ecb-4030-a095-936f15839c31","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.33549/physiolres.934359"}},{"edit_id":"a8ab2cf7-8aad-4060-b281-371d41df32cf","ident":"rqcd44zbfvcovipfpnqpowiq2e","revision":"bd51fd60-438a-42fe-80e9-104705b0580f","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.33549/physiolres.934350"}},{"edit_id":"6d626d5c-1c7c-42e3-8d00-c313a91fc7c1","ident":"ruzbactehngbrlyx4vq3zlaqlu","revision":"5da34289-7da0-42ad-98bb-1c0b21bb6e69","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.5802/smai-jcm.55"}},{"edit_id":"4651ac6b-228c-4918-ac74-0757ce64c031","ident":"rvbhhfvwc5dkhdpxekutlew7di","revision":"53b088f6-653c-4e6d-974a-8e5e6e154f12","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.31616/asj.2019.0190"}},{"edit_id":"156b4a96-af8b-4c01-a8ec-6aa53736450f","ident":"r6j7ad7vmvg75lvel6v4e4gbb4","revision":"46647620-a36a-482b-a651-492a4e6ca1bb","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.5802/smai-jcm.53"}},{"edit_id":"67e6f76f-859d-422d-8c90-715b3173e24c","ident":"scajd3ykrjatbjmifvxwcl3yhu","revision":"d99f1da3-484a-4977-bc14-6e952d007acd","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.24193/ed21.2019.17.11"}},{"edit_id":"22395a5e-2a2e-4bf2-9547-0fb5122f8a7f","ident":"sk22wngc3fh23b74nmkbgeeyya","revision":"7423067d-3a18-4305-98b4-5cee8127e24a","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.31616/asj.2019.0300"}},{"edit_id":"4ea1f9dd-d85e-450b-bc2a-995364fcf3fe","ident":"tma3dvw77bghjfaqrdoiwquuge","revision":"c86364bf-501b-42b4-a3ed-ef334ff26e0a","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.36447/estudios2014.v33-34.art3"}},{"edit_id":"87d158ff-b0e0-4944-88d9-017dcef70d6e","ident":"tvuumx4n75dorebv7bu7imyiym","revision":"ae17166e-43ce-4d5a-95de-4befeeb76fa6","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.35236/jots.663726"}},{"edit_id":"2114c75b-ac23-471a-83d9-c76ae5f6bf42","ident":"twnql5u4mbfqffal2atv5qucoq","revision":"cb73ac6c-bc2b-422b-bc27-7032d14cac4d","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.31616/asj.2019.0162"}},{"edit_id":"2f1d20fd-e69e-4490-9f70-f2b043f53634","ident":"tzwyuimcgrepvhide2sj3lovjm","revision":"b9cf9229-db47-4fa0-a8f6-45320b5af440","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.33549/physiolres.934348"}},{"edit_id":"fdb6c388-9b22-45ff-8db1-1531eac43bbf","ident":"vfwz3fuvbbgcxek2hi6vjo525q","revision":"6c57ecb2-a870-4078-af17-a779ca3ceb28","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.4197/eco.29-1.8"}},{"edit_id":"3b8cf6fa-75e7-4b15-a83c-784552ff76e9","ident":"vtq5tvfltbfv3pizvux32ek5hi","revision":"7c0b5d64-6d20-407b-9de3-482a9c75f40e","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.23917/jurisprudence.v8i2.6977"}},{"edit_id":"899303e8-ce86-4f53-909f-249ef45d9a3b","ident":"vuneewh3fncxpb7bzviptx6kze","revision":"3fd4ff1c-1855-41e2-9bae-7edbae86783a","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.3846/jcem.2020.11826"}},{"edit_id":"c0595f10-72dc-4144-b243-5b344912842f","ident":"v2i53kwtnbea7kfhsnvcpkvixe","revision":"09891662-3d67-4d7f-a4ed-7f9bfa3f426f","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.33549/physiolres.934347"}},{"edit_id":"80f26273-d243-4dcf-af4a-e05226ba679c","ident":"wtish6c32randpuucjxq5byjo4","revision":"a69cd59a-52bf-4c48-9323-62a4070b2440","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.29261/pakvetj/2019.033"}},{"edit_id":"79f1bdc1-2627-4328-97ff-71731295fcad","ident":"xkf7a4cavnhahnfsjj5w5aoopu","revision":"b826cb89-efcc-427f-a378-0829bb2b871c","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.36447/estudios2014.v33-34.art6"}},{"edit_id":"695feb37-ab8e-4ecd-8c9e-fae06ff63e39","ident":"yx24fslfafb6dgx7gvjbmoma5m","revision":"4109aeb1-fcc1-47f3-9d2c-8e3abfbc697e","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1039/c9ra10366j"}},{"edit_id":"48f2eb73-a5a3-440a-8bd1-c581797a82ca","ident":"3p5rah3wbfftdht7rabkpjfcrm","revision":"df754aec-4750-45eb-97e6-943928dad661","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.24193/ed21.2019.17.01"}},{"edit_id":"7a937ed3-5362-40ed-8f96-e6c6231e0adf","ident":"4r3madqhfzb5jb7jd7xmv55em4","revision":"6feccd54-38bd-4d51-8f42-fe211baf5ba3","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.35236/jots.668781"}},{"edit_id":"3beaa94e-c4e1-4a6b-96a7-d455ad13b7aa","ident":"53ooeweri5efjm5vhl2bwjcfze","revision":"a9d318ad-eb2a-4590-a463-8d6016e2e887","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1017/s2045796020000049"}},{"edit_id":"8a0b80e2-c8b2-4457-ac89-2fc7ad7548f7","ident":"546k37iji5bfffakw2egl2azxy","revision":"3a4d93d0-c59e-4f81-8fbb-40ce21b11b1e","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.1039/c9ra08019h"}},{"edit_id":"15fa5e34-9829-483e-bfa5-a4011b974c6b","ident":"6cy3aonbdfgxbjxnujg3hsqx7q","revision":"021fed8f-5109-4389-9e7f-13a70cbaf4a3","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.2478/joim-2019-0023"}},{"edit_id":"6bc507c8-ceea-49a5-8c1f-9c652463588e","ident":"6rxwlcytwzeopgrhidvi236b2q","revision":"60307cd5-28a3-4063-9111-d5e90e1cb346","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.3846/aviation.2019.11913"}},{"edit_id":"283013c6-4400-4fdc-b2b2-1dbd1a262332","ident":"7j4w24plxzc3nnrkorbowmnra4","revision":"0fa4112c-a596-49a2-9d36-82c213db3fb8","editgroup_id":"jebmpiqsjja7jozze7hoy3bir4","extra":{"ingest_request_source":"fatcat-changelog","link_source":"doi","link_source_id":"10.24193/ed21.2019.17.22"}}],"filesets":[],"webcaptures":[],"releases":[],"works":[]}}}
\ No newline at end of file diff --git a/python/tests/files/example_shadow.json b/python/tests/files/example_shadow.json new file mode 100644 index 00000000..3386f481 --- /dev/null +++ b/python/tests/files/example_shadow.json @@ -0,0 +1,10 @@ +{"shadow":{"shadow_corpus":"scimag","shadow_id":"12703034","sha1hex":"0000002922264275f11cca7b1c3fb662070d0dd7","doi":"10.1371/journal.pmed.0020124","pmid":null,"isbn13":null},"file_meta":{"sha1hex":"0000002922264275f11cca7b1c3fb662070d0dd7","sha256hex":"b4728210cc0f70d8a8f8c39bd97fcbbab3eaca4309ac4bdfbce5df3b66c82f79","md5hex":"debd8db178fa08a7a0aaec6e42832a8e","size_bytes":206121,"mimetype":"application/pdf"},"cdx":{"url":"https://link.springer.com/content/pdf/10.1007%2Fs11626-008-9119-8.pdf","datetime":"20180729135948","sha1hex":"0000002922264275f11cca7b1c3fb662070d0dd7","cdx_sha1hex":null,"mimetype":"application/pdf","warc_path":"UNPAYWALL-PDF-CRAWL-2018-07-20180729132538992-15980-16048-wbgrp-svc281/UNPAYWALL-PDF-CRAWL-2018-07-20180729135708800-16009-11693~wbgrp-svc281.us.archive.org~8443.warc.gz","warc_csize":32497,"warc_offset":105265425,"row_created":"2019-08-09T23:25:44.571943+00:00"}} +{"shadow":{"shadow_corpus":"scimag","shadow_id":"51052483","sha1hex":"00000119fa780ce368ebd96563afdb3eebb90ad3","doi":"10.1191/0266355403gh289oa","pmid":null,"isbn13":null},"file_meta":{"sha1hex":"00000119fa780ce368ebd96563afdb3eebb90ad3","sha256hex":"57ce460db4410b9bfaf500ed652fd29e64d46b40c17e28f1156ba03736edf91b","md5hex":"96133eec3a6c533993213e7bdf446251","size_bytes":164344,"mimetype":"application/pdf"},"cdx":null} +{"shadow":{"shadow_corpus":"scimag","shadow_id":"2476283","sha1hex":"0000017a31547caf347fab66282a40831b9ceb08","doi":"10.1016/0042-207x(62)90512-2","pmid":"54321","isbn13":null},"file_meta":{"sha1hex":"0000017a31547caf347fab66282a40831b9ceb08","sha256hex":"e8d0c607b024ff6ffd58a35f76c454844b70ad19fe3f78a573af1ae53f53ad9d","md5hex":"b53318522b9f35a42b7e53f150fe70b2","size_bytes":116735,"mimetype":"application/pdf"},"cdx":null} +{"shadow":{"shadow_corpus":"scimag","shadow_id":"8760871","sha1hex":"000001abf3dbf936d5053d14f41699722531b8c6","doi":"10.1016/s0042-207x(79)80945-8","pmid":null,"isbn13":null},"file_meta":{"sha1hex":"000001abf3dbf936d5053d14f41699722531b8c6","sha256hex":"8a69b4a6dff98682ad43e7d4139221c1557c1bd202b615490af8a2c7dcbb71d2","md5hex":"29e1cfac8ecfbc8be57a1ec8b465c4be","size_bytes":138218,"mimetype":"application/pdf"},"cdx":null} +{"shadow":{"shadow_corpus":"scimag","shadow_id":"11473618","sha1hex":"0000022e387be46ef797f6686d36c9899cbd6856","doi":"10.1038/ng.2339","pmid":null,"isbn13":null},"file_meta":{"sha1hex":"0000022e387be46ef797f6686d36c9899cbd6856","sha256hex":"a72517e8e72d78bc07a6ef7ff3a6d1d3e04325df986cb8f1bbb4e809f7a9dbdd","md5hex":"9cb8a6e056c9cc740d3bed0c50cd53dc","size_bytes":80992,"mimetype":"application/pdf"},"cdx":null} +{"shadow":{"shadow_corpus":"scimag","shadow_id":"47301218","sha1hex":"0000029209536bda5f22e5110e573c5bd8ceb43a","doi":"10.2307/23406551","pmid":null,"isbn13":null},"file_meta":{"sha1hex":"0000029209536bda5f22e5110e573c5bd8ceb43a","sha256hex":"315f1d39a00ccf256fa15d92a14869dbda48d31500989aaacb11368f906a5827","md5hex":"8141b42ec3bb41fa87099633a1b61d93","size_bytes":305236,"mimetype":"application/pdf"},"cdx":null} +{"shadow":{"shadow_corpus":"scimag","shadow_id":"30603850","sha1hex":"000002c1abd521f18aa23d9e8f464e697e218ab1","doi":"10.1109/spire.1998.712983","pmid":null,"isbn13":null},"file_meta":{"sha1hex":"000002c1abd521f18aa23d9e8f464e697e218ab1","sha256hex":"777e2c472e9d2fec3bbd26bad788562cf1e08e5850315c25cfb6e46d38e7e4af","md5hex":"3a3c92fabaf6cf437bb596d9e9255ff6","size_bytes":113768,"mimetype":"application/pdf"},"cdx":{"url":"http://proteomics.bioprojects.org/pavel/papers/SST_versus_EST_in_gene_recognition..pdf","datetime":"20081121222143","sha1hex":"000002c1abd521f18aa23d9e8f464e697e218ab1","cdx_sha1hex":null,"mimetype":"application/pdf","warc_path":"1227992340180_31-c/1227992509265_9.arc.gz","warc_csize":61212,"warc_offset":62956683,"row_created":"2020-01-07T02:06:33.965383+00:00"}} +{"shadow":{"shadow_corpus":"scimag","shadow_id":"9311918","sha1hex":"000002d4f7d4174451e4214475d5ba59f1f6a593","doi":"10.1111/j.1439-0507.2008.01572.x","pmid":"18721331","isbn13":null},"file_meta":{"sha1hex":"000002d4f7d4174451e4214475d5ba59f1f6a593","sha256hex":"713758ce0417f604c0a4b0bf5b5eea571a9b08ca4cc81a98d602c43f42abfe37","md5hex":"0df123e6305c617ffd38ebef90b1e318","size_bytes":178664,"mimetype":"application/pdf"},"cdx":null} +{"shadow":{"shadow_corpus":"scimag","shadow_id":"7757772","sha1hex":"000002f8966a4c5547f8a47f43661fcc3edc34ea","doi":"10.1007/s10464-011-9424-3","pmid":"21287262","isbn13":null},"file_meta":{"sha1hex":"000002f8966a4c5547f8a47f43661fcc3edc34ea","sha256hex":"ee1bce27134ae55b3d67f9b31f66571e41ac496fc3fb526dec2d53513b8f6deb","md5hex":"e72c5cf3d61635821e78ca0306c98887","size_bytes":337857,"mimetype":"application/pdf"},"cdx":null} +{"shadow":{"shadow_corpus":"scimag","shadow_id":"74272862","sha1hex":"000003a94022be58305ccc2a018a6359eeb226db","doi":"10.1002/slct.201802783","pmid":null,"isbn13":null},"file_meta":{"sha1hex":"000003a94022be58305ccc2a018a6359eeb226db","sha256hex":"f277eefc7b1466df814a7a892ab8e2e7f08db1faae0bf73b893211e5f5b37193","md5hex":"27534b8494f54ba5de47c16fb2590b04","size_bytes":1372272,"mimetype":"application/pdf"},"cdx":null} diff --git a/python/tests/files/file_bcah4zp5tvdhjl5bqci2c2lgfa.json b/python/tests/files/file_bcah4zp5tvdhjl5bqci2c2lgfa.json new file mode 100644 index 00000000..bed8977d --- /dev/null +++ b/python/tests/files/file_bcah4zp5tvdhjl5bqci2c2lgfa.json @@ -0,0 +1 @@ +{"release_ids":["5tbuas2e4vd6jaowbgzmmhhqxe"],"mimetype":"application/pdf","urls":[{"url":"https://web.archive.org/web/20200130042753/https://www.zhros.ru/jour/article/download/811/542","rel":"webarchive"},{"url":"https://www.zhros.ru/jour/article/download/811/542","rel":"web"}],"sha256":"1665cdb90b73c684233038601c52995acef77bb37aefc6e63ae13e4194d48261","sha1":"3ad4df99ff1354ec0b5a333a59fba9a3a5d9812a","md5":"39159f9c8e98a245f954c9000b0f2810","size":739980,"revision":"dcc7a975-725d-4bc9-8c3f-cd0476cd485e","ident":"bcah4zp5tvdhjl5bqci2c2lgfa","state":"active"}
\ No newline at end of file diff --git a/python/tests/files/release_etodop5banbndg3faecnfm6ozi.json b/python/tests/files/release_etodop5banbndg3faecnfm6ozi.json new file mode 100644 index 00000000..1204c95d --- /dev/null +++ b/python/tests/files/release_etodop5banbndg3faecnfm6ozi.json @@ -0,0 +1 @@ +{"abstracts":[],"refs":[{"index":0,"extra":{"issue":"Suppl 1","volume":"118"},"key":"10.1111/j.1471-0528.2011.03098.x-BIB1|cit1","year":2011,"container_name":"BJOG","title":"Saving Mothers' Lives: reviewing maternal deaths to make motherhood safer-2006-2008. The Eighth Report of the Confidential Enquiries into Maternal Deaths in the United Kingdom"}],"contribs":[{"index":0,"raw_name":"Philip Steer","role":"author","extra":{"seq":"first"}}],"language":"en","publisher":"Wiley","pages":"1404-1404","issue":"11","volume":"118","ext_ids":{"doi":"10.1111/j.1471-0528.2011.03098.x"},"release_year":2011,"release_date":"2011-09-09","release_stage":"published","release_type":"article-journal","container_id":"hl5g6d5msjcl7hlbyyvcsbhc2u","webcaptures":[],"filesets":[],"files":[],"container":{"wikidata_qid":"Q15724571","issnl":"1470-0328","publisher":"Wiley (Blackwell Publishing)","container_type":"journal","name":"BJOG: an International Journal of Obstetrics and Gynaecology","extra":{"abbrev":"BJOG","country":"gb","ia":{"sim":{"year_spans":[[1902,1915],[1921,2015]]}},"issne":"1471-0528","issnp":"1470-0328","kbart":{"clockss":{"year_spans":[[1989,1989],[1993,1993],[2002,2003],[2009,2017]]},"portico":{"year_spans":[[1902,2019]]}},"languages":["en"],"sherpa_romeo":{"color":"yellow"},"urls":["http://www.bjog.org/view/0/index.html"]},"revision":"ec26766c-c1fe-453b-837d-087cc254fe07","ident":"hl5g6d5msjcl7hlbyyvcsbhc2u","state":"active"},"work_id":"wmwe5wwkzfcs7gyjfgdeanksha","title":"Saving Mothers' Lives. Reviewing maternal deaths to make motherhood safer: 2006-2008","state":"active","ident":"etodop5banbndg3faecnfm6ozi","revision":"deb7e050-6df6-42ed-9704-788a0e30facf","extra":{"crossref":{"type":"journal-article"},"subtitle":["Correpondence"]}}
\ No newline at end of file diff --git a/python/tests/import_shadow.py b/python/tests/import_shadow.py new file mode 100644 index 00000000..70a918d2 --- /dev/null +++ b/python/tests/import_shadow.py @@ -0,0 +1,61 @@ + +import json +import pytest +from fatcat_tools.importers import ShadowLibraryImporter, JsonLinePusher +from fixtures import api + + +@pytest.fixture(scope="function") +def shadow_importer(api): + yield ShadowLibraryImporter(api) + +# TODO: use API to check that entities actually created... +def test_shadow_importer_basic(shadow_importer): + with open('tests/files/example_shadow.json', 'r') as f: + JsonLinePusher(shadow_importer, f).run() + +def test_shadow_importer(shadow_importer): + last_index = shadow_importer.api.get_changelog(limit=1)[0].index + with open('tests/files/example_shadow.json', 'r') as f: + shadow_importer.bezerk_mode = True + counts = JsonLinePusher(shadow_importer, f).run() + assert counts['insert'] == 2 + assert counts['exists'] == 0 + assert counts['skip'] == 8 + + # fetch most recent editgroup + change = shadow_importer.api.get_changelog_entry(index=last_index+1) + eg = change.editgroup + assert eg.description + assert "shadow library" in eg.description.lower() + assert eg.extra['git_rev'] + assert "fatcat_tools.ShadowLibraryImporter" in eg.extra['agent'] + + # re-insert; should skip + with open('tests/files/example_shadow.json', 'r') as f: + shadow_importer.reset() + shadow_importer.bezerk_mode = False + counts = JsonLinePusher(shadow_importer, f).run() + assert counts['insert'] == 0 + assert counts['exists'] == 2 + assert counts['skip'] == 8 + +def test_shadow_dict_parse(shadow_importer): + with open('tests/files/example_shadow.json', 'r') as f: + raw = json.loads(f.readline()) + f = shadow_importer.parse_record(raw) + + assert f.sha1 == "0000002922264275f11cca7b1c3fb662070d0dd7" + assert f.md5 == "debd8db178fa08a7a0aaec6e42832a8e" + assert f.sha256 == "b4728210cc0f70d8a8f8c39bd97fcbbab3eaca4309ac4bdfbce5df3b66c82f79" + assert f.mimetype == "application/pdf" + assert f.size == 206121 + assert len(f.urls) == 2 + for u in f.urls: + if u.rel == "publisher": + assert u.url.startswith("https://link.springer.com/content/pdf/10.1007%2Fs11626-008-9119-8.pdf") + if u.rel == "webarchive": + assert u.url.startswith("https://web.archive.org/") + assert "20180729135948" in u.url + assert len(f.release_ids) == 1 + diff --git a/python/tests/transform_tests.py b/python/tests/transform_elasticsearch.py index f254e117..a954fc4d 100644 --- a/python/tests/transform_tests.py +++ b/python/tests/transform_elasticsearch.py @@ -7,6 +7,7 @@ from fixtures import api from import_journal_metadata import journal_metadata_importer from import_crossref import crossref_importer +from import_matched import matched_importer def test_basic_elasticsearch_convert(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: @@ -72,14 +73,67 @@ def test_rich_elasticsearch_convert(): assert es['ref_count'] == 2 assert es['ref_linked_count'] == 1 -def test_elasticsearch_from_json(): - r = entity_from_json(open('./tests/files/math_universe.json', 'r').read(), ReleaseEntity) - release_to_elasticsearch(r) +def test_elasticsearch_release_from_json(): + r = entity_from_json(open('./tests/files/release_etodop5banbndg3faecnfm6ozi.json', 'r').read(), ReleaseEntity) + es = release_to_elasticsearch(r) + + assert es['subtitle'] == "Correpondence" + assert es['ident'] == "etodop5banbndg3faecnfm6ozi" + assert es['container_name'] == "BJOG: an International Journal of Obstetrics and Gynaecology" + assert es['first_page'] == "1404" + assert es['issue'] == "11" + assert es['volume'] == "118" + assert es['number'] == None + assert es['in_ia_sim'] == True + assert es['in_kbart'] == True -def test_elasticsearch_container_convert(journal_metadata_importer): +def test_elasticsearch_container_transform(journal_metadata_importer): with open('tests/files/journal_metadata.sample.json', 'r') as f: raw = json.loads(f.readline()) c = journal_metadata_importer.parse_record(raw) c.state = 'active' es = container_to_elasticsearch(c) assert es['publisher'] == c.publisher + +def test_elasticsearch_file_transform(matched_importer): + f = entity_from_json(open('./tests/files/file_bcah4zp5tvdhjl5bqci2c2lgfa.json', 'r').read(), FileEntity) + + f.state = 'active' + es = file_to_elasticsearch(f) + assert es['sha1'] == f.sha1 + assert es['sha256'] == f.sha256 + assert es['md5'] == f.md5 + assert es['size_bytes'] == f.size + assert es['mimetype'] == f.mimetype + assert es['in_ia'] == True + + assert 'web' in es['rels'] + assert 'www.zhros.ru' in es['hosts'] + assert 'zhros.ru' in es['domains'] + assert 'archive.org' in (es['hosts'] + es['domains']) + assert 'web.archive.org' in (es['hosts'] + es['domains']) + # old regression + assert not '.archive.org' in (es['hosts'] + es['domains']) + +def test_elasticsearch_changelog_transform(matched_importer): + ce = entity_from_json(open('./tests/files/changelog_3469683.json', 'r').read(), ChangelogEntry) + + es = changelog_to_elasticsearch(ce) + assert es['index'] == 3469683 + # len("2020-01-30T05:04:39") => 19 + assert es['timestamp'][:19] == "2020-01-30T05:04:39.738601Z"[:19] + assert es['editor_id'] == "scmbogxw25evtcesfcab5qaboa" + assert es['username'] == "crawl-bot" + assert es['is_bot'] == True + assert es['is_admin'] == True + assert es['agent'] == "fatcat_tools.IngestFileResultImporter" + + assert es['total'] == 50 + assert es['files'] == 50 + assert es['new_files'] == 50 + assert es['created'] == 50 + + assert es['releases'] == 0 + assert es['new_releases'] == 0 + assert es['updated'] == 0 + assert es['deleted'] == 0 |