aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2022-03-23 18:55:21 -0700
committerBryan Newbold <bnewbold@robocracy.org>2022-03-23 18:55:21 -0700
commit134cb050988be2c545af89e0a67c4998307bb819 (patch)
tree27b7dc3b1fb455c8cf9af98c6bb5bcf618a2f16f /python
parent929f6d1020362a8065d1e0c95d2ee67c88f89b33 (diff)
downloadfatcat-134cb050988be2c545af89e0a67c4998307bb819.tar.gz
fatcat-134cb050988be2c545af89e0a67c4998307bb819.zip
single-file variant of fileset importer for dataset attempts
Diffstat (limited to 'python')
-rwxr-xr-xpython/fatcat_import.py58
-rw-r--r--python/fatcat_tools/importers/__init__.py1
-rw-r--r--python/fatcat_tools/importers/ingest.py201
-rw-r--r--python/tests/files/example_fileset_file_ingest_result.json20
-rw-r--r--python/tests/import_ingest.py60
5 files changed, 340 insertions, 0 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index f502d4ed..2896577e 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -25,6 +25,7 @@ from fatcat_tools.importers import (
FilesetImporter,
GrobidMetadataImporter,
IngestFileResultImporter,
+ IngestFilesetFileResultImporter,
IngestFilesetResultImporter,
IngestWebResultImporter,
JalcImporter,
@@ -232,6 +233,30 @@ def run_ingest_fileset(args: argparse.Namespace) -> None:
JsonLinePusher(ifri, args.json_file).run()
+def run_ingest_fileset_file(args: argparse.Namespace) -> None:
+ ifri = IngestFilesetFileResultImporter(
+ args.api,
+ editgroup_description=args.editgroup_description_override,
+ skip_source_allowlist=args.skip_source_allowlist,
+ do_updates=args.do_updates,
+ default_link_rel=args.default_link_rel,
+ edit_batch_size=args.batch_size,
+ )
+ if args.kafka_mode:
+ KafkaJsonPusher(
+ ifri,
+ args.kafka_hosts,
+ args.kafka_env,
+ "ingest-fileset-results",
+ "fatcat-{}-ingest-fileset-result".format(args.kafka_env),
+ kafka_namespace="sandcrawler",
+ consume_batch_size=args.batch_size,
+ force_flush=True,
+ ).run()
+ else:
+ JsonLinePusher(ifri, args.json_file).run()
+
+
def run_savepapernow_file(args: argparse.Namespace) -> None:
ifri = SavePaperNowFileImporter(
args.api,
@@ -750,6 +775,39 @@ def main() -> None:
help="default URL rel for matches (eg, 'publisher', 'web')",
)
+ sub_ingest_fileset_file = subparsers.add_parser(
+ "ingest-fileset-file-results",
+ help="add/update file entities linked to releases based on sandcrawler dataset/fileset ingest results",
+ )
+ sub_ingest_fileset_file.set_defaults(
+ func=run_ingest_fileset_file,
+ auth_var="FATCAT_AUTH_WORKER_CRAWL",
+ )
+ sub_ingest_fileset_file.add_argument(
+ "json_file",
+ help="ingest_fileset JSON file to import from",
+ default=sys.stdin,
+ type=argparse.FileType("r"),
+ )
+ sub_ingest_fileset_file.add_argument(
+ "--skip-source-allowlist",
+ action="store_true",
+ help="don't filter import based on request source allowlist",
+ )
+ sub_ingest_fileset_file.add_argument(
+ "--kafka-mode", action="store_true", help="consume from kafka topic (not stdin)"
+ )
+ sub_ingest_fileset_file.add_argument(
+ "--do-updates",
+ action="store_true",
+ help="update pre-existing fileset entities if new match (instead of skipping)",
+ )
+ sub_ingest_fileset_file.add_argument(
+ "--default-link-rel",
+ default="fileset",
+ help="default URL rel for matches (eg, 'publisher', 'web')",
+ )
+
sub_savepapernow_file = subparsers.add_parser(
"savepapernow-file-results",
help="add file entities crawled due to async Save Paper Now request",
diff --git a/python/fatcat_tools/importers/__init__.py b/python/fatcat_tools/importers/__init__.py
index 654be2e9..e13ab552 100644
--- a/python/fatcat_tools/importers/__init__.py
+++ b/python/fatcat_tools/importers/__init__.py
@@ -37,6 +37,7 @@ from .fileset_generic import FilesetImporter
from .grobid_metadata import GrobidMetadataImporter
from .ingest import (
IngestFileResultImporter,
+ IngestFilesetFileResultImporter,
IngestFilesetResultImporter,
IngestWebResultImporter,
SavePaperNowFileImporter,
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index c8d04d6f..653b4b0a 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -855,6 +855,207 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
)
+class IngestFilesetFileResultImporter(IngestFileResultImporter):
+ """
+ Variant of IngestFileResultImporter for processing dataset (Fileset) ingest
+ results, which resulted in a single file, into File entities.
+ """
+
+ def __init__(self, api: ApiClient, **kwargs) -> None:
+
+ eg_desc = (
+ kwargs.pop("editgroup_description", None)
+ or "Single files crawled from web using sandcrawler ingest tool, in dataset mode"
+ )
+ eg_extra = kwargs.pop("editgroup_extra", dict())
+ eg_extra["agent"] = eg_extra.get(
+ "agent", "fatcat_tools.IngestFilesetFileResultImporter"
+ )
+ kwargs["do_updates"] = False
+ super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
+ self.max_file_count = 300
+
+ def want_fileset(self, row: Dict[str, Any]) -> bool:
+
+ manifest: Optional[List[Any]] = row.get("manifest")
+ if not manifest or len(manifest) == 0:
+ self.counts["skip-empty-manifest"] += 1
+ return False
+
+ if len(manifest) > 1:
+ self.counts["skip-multiple-files"] += 1
+ return False
+
+ assert len(manifest) == 1
+ return True
+
+ def want(self, row: Dict[str, Any]) -> bool:
+
+ if not self.want_ingest(row):
+ return False
+
+ if row.get("status") != "success-file":
+ self.counts["skip-status"] += 1
+ return False
+
+ # fileset-specific filters
+ if row["request"].get("ingest_type") not in [
+ "dataset",
+ ]:
+ self.counts["skip-ingest-type"] += 1
+ return False
+
+ if not self.want_fileset(row):
+ return False
+
+ return True
+
+ def parse_fileset_urls(self, row: Dict[str, Any]) -> List[FilesetUrl]:
+ if not row.get("ingest_strategy"):
+ return []
+ strategy = row["ingest_strategy"]
+ urls = []
+ # XXX
+ if strategy == "archiveorg-fileset" and row.get("archiveorg_item_name"):
+ urls.append(
+ fatcat_openapi_client.FilesetUrl(
+ url=f"https://archive.org/download/{row['archiveorg_item_name']}/",
+ rel="archive-base",
+ )
+ )
+ if strategy.startswith("web-") and row.get("platform_base_url"):
+ urls.append(
+ fatcat_openapi_client.FilesetUrl(
+ url=f"https://web.archive.org/web/{row['web_base_url_dt']}/{row['web_base_url']}",
+ rel="webarchive-base",
+ )
+ )
+ if strategy == "archiveorg-fileset-bundle" and row.get("archiveorg_item_name"):
+ urls.append(
+ fatcat_openapi_client.FilesetUrl(
+ url=f"https://archive.org/download/{row['archiveorg_item_name']}/{row['archiveorg_bundle_path']}",
+ rel="archive-bundle",
+ )
+ )
+
+ if strategy == "web-fileset-bundle" and row.get("platform_bundle_url"):
+ urls.append(
+ fatcat_openapi_client.FilesetUrl(
+ url=f"https://web.archive.org/web/{row['web_bundle_url_dt']}/{row['web_bundle_url']}",
+ rel="webarchive-bundle",
+ )
+ )
+
+ # add any additional / platform URLs here
+ if row.get("platform_bundle_url"):
+ urls.append(
+ fatcat_openapi_client.FilesetUrl(
+ url=row["platform_bundle_url"],
+ rel="repository-bundle",
+ )
+ )
+ if row.get("platform_base_url"):
+ urls.append(
+ fatcat_openapi_client.FilesetUrl(
+ url=row["platform_bundle_url"],
+ rel="repository-base",
+ )
+ )
+ elif row.get("terminal"):
+ # fallback generic web URL
+ urls.append(
+ fatcat_openapi_client.FilesetUrl(
+ url=row["terminal"]["terminal_url"],
+ rel="web",
+ )
+ )
+
+ return urls
+
+ def parse_record(self, row: Dict[str, Any]) -> FileEntity:
+
+ request = row["request"]
+
+ # double check that want() filtered request correctly
+ if request.get("ingest_type") not in [
+ "dataset",
+ ]:
+ self.counts["skip-ingest-type"] += 1
+ return None
+
+ # identify release by fatcat ident, or extid lookup
+ release_ident = self.parse_ingest_release_ident(row)
+
+ if not release_ident:
+ self.counts["skip-release-not-found"] += 1
+ return None
+
+ entity_extra: Dict[str, Any] = dict()
+ edit_extra = self.parse_edit_extra(row)
+ edit_extra["ingest_strategy"] = row["ingest_strategy"]
+ if row.get("platform"):
+ edit_extra["platform"] = row["platform"]
+ if row.get("platform_id"):
+ edit_extra["platform_id"] = row["platform_id"]
+
+ assert row["file_count"] == len(row["manifest"]) == 1
+ file_meta = row["manifest"][0]
+ # print(file_meta)
+ assert file_meta["status"] == "success"
+
+ # add file-level access URLs
+ entity_urls = []
+ if file_meta.get("platform_url"):
+ entity_urls.append(FileUrl(rel="web", url=file_meta["platform_url"]))
+ if file_meta.get("terminal_url") and file_meta.get("terminal_dt"):
+ entity_urls.append(
+ FileUrl(
+ rel="webarchive",
+ url=f"https://web.archive.org/web/{file_meta['terminal_dt']}/{file_meta['terminal_url']}",
+ )
+ )
+ if row["ingest_strategy"] == "archiveorg-file":
+ entity_urls.append(
+ FileUrl(
+ rel="archive",
+ url=f"https://archive.org/download/{row['archiveorg_item_name']}/{file_meta['path']}",
+ )
+ )
+
+ if not entity_urls:
+ self.counts["skip-no-access-url"] += 1
+ return None
+
+ entity_extra = dict()
+ entity_extra["path"] = file_meta["path"]
+
+ # this is to work around a bug in old sandcrawler ingest code
+ if file_meta["md5"] == file_meta["sha1"]:
+ self.counts["skip-bad-hashes"] += 1
+ return None
+
+ fe = FileEntity(
+ md5=file_meta["md5"],
+ sha1=file_meta["sha1"],
+ sha256=file_meta["sha256"],
+ size=file_meta["size"],
+ mimetype=file_meta["mimetype"],
+ release_ids=[release_ident],
+ urls=entity_urls,
+ extra=entity_extra or None,
+ )
+ if not (fe.md5 and fe.sha1 and fe.sha256 and (fe.size is not None) and fe.mimetype):
+ self.counts["skip-partial-file-info"] += 1
+ return None
+
+ if entity_extra:
+ fe.extra = entity_extra
+ edit_extra = self.parse_edit_extra(row)
+ if edit_extra:
+ fe.edit_extra = edit_extra
+ return fe
+
+
class SavePaperNowFilesetImporter(IngestFilesetResultImporter):
"""
Like SavePaperNowFileImporter, but for fileset/dataset ingest.
diff --git a/python/tests/files/example_fileset_file_ingest_result.json b/python/tests/files/example_fileset_file_ingest_result.json
new file mode 100644
index 00000000..94ae4cc6
--- /dev/null
+++ b/python/tests/files/example_fileset_file_ingest_result.json
@@ -0,0 +1,20 @@
+{"archiveorg_item_name":"springernature.figshare.com-7767695-v1","cdx":{"datetime":"20200223225704","mimetype":"text/html","sha1b32":"37T7GPFQ27ZL6X56G47QKDBZNO5GKTYC","sha1hex":"dfe7f33cb0d7f2bf5fbe373f050c396bba654f02","status_code":200,"surt":"com,figshare,springernature)/articles/landsat_ndvi_difference_1990_2018_tile_6/7767695/1","url":"https://springernature.figshare.com/articles/Landsat_NDVI_difference_1990_2018_Tile_6/7767695/1","warc_csize":6423,"warc_offset":6370648,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200223215122016-26231-26295-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200223225636857-26294-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"0e3019c06b0bc9144d2d9cde84d14ffa","mimetype":"text/html","sha1hex":"dfe7f33cb0d7f2bf5fbe373f050c396bba654f02","sha256hex":"093d7c222bb294b95a0cd55b6285e29d78415460c7b64bb100bcf5e2bcdeb24c","size_bytes":26422},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.7767695.v1","https://springernature.figshare.com/articles/Landsat_NDVI_difference_1990_2018_Tile_6/7767695/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"dfc41b617564f99a12e6077a6208876f","mimetype":"image/tiff","path":"NDVI_Diff_1990_2018_T06.tif","platform_url":"https://ndownloader.figshare.com/files/14460875","sha1":"6fb020064da66bb7a666c17555611cf6820fc9ae","sha256":"2febad53ff0f163a18d7cbb913275bf99ed2544730cda191458837e2b0da9d18","size":410631015,"status":"success"}],"platform_base_url":null,"platform_domain":"springernature.figshare.com","platform_id":"7767695","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.7767695.v1","ext_ids":{"doi":"10.6084/m9.figshare.7767695.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"bngz3appb5g5vkb4gxjzitmqzu"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.7767695.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200223225704","terminal_sha1hex":"dfe7f33cb0d7f2bf5fbe373f050c396bba654f02","terminal_status_code":200,"terminal_url":"https://springernature.figshare.com/articles/Landsat_NDVI_difference_1990_2018_Tile_6/7767695/1"},"total_size":410631015}
+{"archiveorg_item_name":"figshare.com-3807984-v1","cdx":{"datetime":"20200213033157","mimetype":"text/html","sha1b32":"X6CTKDY5QDKG376RGYHQ6KUJRL6MJJS6","sha1hex":"bf85350f1d80d46dffd1360f0f2a898afcc4a65e","status_code":200,"surt":"com,figshare)/articles/20150722_qe5_uplc8_rjc_collab_db_4067_01_raw/3807984/1","url":"https://figshare.com/articles/20150722_QE5_UPLC8_RJC_COLLAB_DB_4067_01_raw/3807984/1","warc_csize":6114,"warc_offset":45362828,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200213031757373-10606-10668-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200213033034289-10620-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"4128902a230cd174d43b9fa41d729510","mimetype":"text/html","sha1hex":"bf85350f1d80d46dffd1360f0f2a898afcc4a65e","sha256hex":"fbcf66355bfe726cbf087989459af5e7052c6f0d65e566542ac914e2665f75b6","size_bytes":23637},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.3807984.v1","https://figshare.com/articles/20150722_QE5_UPLC8_RJC_COLLAB_DB_4067_01_raw/3807984/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"03a540a431cd6adf5fd214ba9cdb9a08","mimetype":"application/octet-stream","path":"20150722_QE5_UPLC8_RJC_COLLAB_DB_4067_01.raw","platform_url":"https://ndownloader.figshare.com/files/5930763","sha1":"8454fe535ac60b5f02d71a78f332b98b507de6d2","sha256":"b8967de9381a4f8c8b361b21b09605602b9250799d00f5d43f5be97612f01e18","size":1827479779,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"3807984","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.3807984.v1","ext_ids":{"doi":"10.6084/m9.figshare.3807984.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"b63ugovrtvestd32k3d6kzo5iq"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.3807984.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200213033157","terminal_sha1hex":"bf85350f1d80d46dffd1360f0f2a898afcc4a65e","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/20150722_QE5_UPLC8_RJC_COLLAB_DB_4067_01_raw/3807984/1"},"total_size":1827479779}
+{"archiveorg_item_name":"figshare.com-873634-v5","cdx":{"datetime":"20200215191649","mimetype":"text/html","sha1b32":"5W4NNJWLAPAJKLB7WTZTAU2RR7IO3MW2","sha1hex":"edb8d6a6cb03c0952c3fb4f33053518fd0edb2da","status_code":200,"surt":"com,figshare)/articles/_research_proposal_mutation_inheritance_the_long_term_effects_of_naturally_occurring_radiation_on_living_systems/873634/5","url":"https://figshare.com/articles/_Research_Proposal_Mutation_Inheritance_The_Long_Term_Effects_of_Naturally_Occurring_Radiation_on_Living_Systems/873634/5","warc_csize":6944,"warc_offset":25128858,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200215190429339-14296-14362-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200215191529091-14307-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"4d334dac22835b23f7fcd4aa6802b393","mimetype":"text/html","sha1hex":"edb8d6a6cb03c0952c3fb4f33053518fd0edb2da","sha256hex":"45a0fd7db2c88199bbe666a418c383036e72c90f2481b694e3ce766fa487e83f","size_bytes":34155},"fileset_file":{"cdx":{"datetime":"20200212204410","mimetype":"binary/octet-stream","sha1b32":"4I4HQS45AZEKSHDPJPAQSNTQG5GA2EP6","sha1hex":"e238784b9d0648a91c6f4bc1093670374c0d11fe","status_code":200,"surt":"com,amazonaws,s3-eu-west-1)/pfigshare-u-files/1798358/mutationinheritance.pdf","url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/1798358/MutationInheritance.pdf","warc_csize":139934,"warc_offset":17465253,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200212203938614-09941-10005-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200212204347568-09946-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_meta":[{"md5hex":"57f8cb4fe96e7c55caefcb40e2d5e74f","mimetype":"application/pdf","sha1hex":"e238784b9d0648a91c6f4bc1093670374c0d11fe","sha256hex":"7e403da8055c05bef83051956f523f7e98763732bc93904743f6cdf1865de04d","size_bytes":149128}],"terminal":{"terminal_dt":"20200212204410","terminal_status_code":200,"terminal_url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/1798358/MutationInheritance.pdf"}},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.873634.v5","https://figshare.com/articles/_Research_Proposal_Mutation_Inheritance_The_Long_Term_Effects_of_Naturally_Occurring_Radiation_on_Living_Systems/873634/5"],"html_biblio":{"abstract":"This research proposal seeks to investigate the possibility that naturally occurring radiation (background radiation) could alter species composition (genotype → phenotype) over extended timeframes. We look to establish a parallel between changed levels of natural radiation and significant evolutionary events. In particular, we will draw our attention to the establishment of a modern atmosphere and the changes in radiation levels that came thereafter. Our study will focus less on early microbial life, and more on modern vertebrates/invertebrates and the subsequent colonization of land species that took place after the ozone layer was established. By implementing a reliable framework in which to test and evaluate genetic variance over successive generations, we will demonstrate that naturally occurring radiation is more influential in creating genetic variance than previously thought. In doing so, we hope to create a standard model for determining expectancies in population genetics based on exposure to multiple levels of radiation.","container_name":"figshare","contrib_names":["Rodriguez, Tommy"],"doi":"10.6084/m9.figshare.873634.v5","pdf_fulltext_url":"https://figshare.com/articles/_Research_Proposal_Mutation_Inheritance_The_Long_Term_Effects_of_Naturally_Occurring_Radiation_on_Living_Systems/873634/files/1798358.pdf","publisher":"figshare","release_date":"2014-11-18","title":"[Research Proposal] Mutation & Inheritance: The Long Term Effects of Naturally Occurring Radiation on Living Systems"},"ingest_strategy":"web-file","manifest":[{"md5":"57f8cb4fe96e7c55caefcb40e2d5e74f","mimetype":"application/pdf","path":"Mutation&Inheritance.pdf","platform_url":"https://ndownloader.figshare.com/files/1798358","sha1":"57f8cb4fe96e7c55caefcb40e2d5e74f","sha256":"7e403da8055c05bef83051956f523f7e98763732bc93904743f6cdf1865de04d","size":149128,"status":"success","terminal_dt":"20200212204410","terminal_url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/1798358/MutationInheritance.pdf"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"873634","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.873634.v5","ext_ids":{"doi":"10.6084/m9.figshare.873634.v5"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"b62yygefaneexcklxfti2lmwoi"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.873634.v5","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200215191649","terminal_sha1hex":"edb8d6a6cb03c0952c3fb4f33053518fd0edb2da","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/_Research_Proposal_Mutation_Inheritance_The_Long_Term_Effects_of_Naturally_Occurring_Radiation_on_Living_Systems/873634/5"},"total_size":149128}
+{"archiveorg_item_name":"figshare.com-4609288-v1","cdx":{"datetime":"20200210051725","mimetype":"text/html","sha1b32":"5NYNWPBAWI3BSBXJTJ63YJPBSW6KMXR5","sha1hex":"eb70db3c20b2361906e99a7dbc25e195bca65e3d","status_code":200,"surt":"com,figshare)/articles/digestion_7_from_exp_it13_leica_register/4609288/1","url":"https://figshare.com/articles/DIgestion_7_from_EXP_IT13_LEICA_register/4609288/1","warc_csize":6029,"warc_offset":37698022,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200210050626489-05335-05397-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200210051705009-05349-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"1e6889267abd940e3ce2ef71bc7d4f4f","mimetype":"text/html","sha1hex":"eb70db3c20b2361906e99a7dbc25e195bca65e3d","sha256hex":"f74bd9c54030b717e0ca79b2670410ef771323a487fdd887cf41f5db4f3c7b41","size_bytes":22742},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.4609288.v1","https://figshare.com/articles/DIgestion_7_from_EXP_IT13_LEICA_register/4609288/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"86e1cbe9220739e6dcbbb5ad832df3a4","mimetype":"application/octet-stream","path":"150816 dig7.lif","platform_url":"https://ndownloader.figshare.com/files/7495531","sha1":"e18476865fbf40c92366122b56cb1228701c5519","sha256":"d62db427ac1bcb0ca09814a8b665d1c2b09ad1f34a03bf4e589eab6ae316d712","size":277695787,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"4609288","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.4609288.v1","ext_ids":{"doi":"10.6084/m9.figshare.4609288.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"duewk6zaurgxvj2hczupf7zuha"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.4609288.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200210051725","terminal_sha1hex":"eb70db3c20b2361906e99a7dbc25e195bca65e3d","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/DIgestion_7_from_EXP_IT13_LEICA_register/4609288/1"},"total_size":277695787}
+{"archiveorg_item_name":"figshare.com-1425046-v2","cdx":{"datetime":"20200220172238","mimetype":"text/html","sha1b32":"XNELHLDX7EWMB5JA7RLWRIFVYXPDYS5G","sha1hex":"bb48b3ac77f92cc0f520fc5768a0b5c5de3c4ba6","status_code":200,"surt":"com,figshare)/articles/s14_1/1425046/2","url":"https://figshare.com/articles/S14_1/1425046/2","warc_csize":6182,"warc_offset":119019093,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200220164114618-21546-21609-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200220172053663-21589-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"d9419376cf92a582eaa622a22e7ab7a2","mimetype":"text/html","sha1hex":"bb48b3ac77f92cc0f520fc5768a0b5c5de3c4ba6","sha256hex":"c41ee64c91bf32eecd9f4ab317e3f2e1499b6bf8ccd3b100dfa3433896d1eb00","size_bytes":24693},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.1425046.v2","https://figshare.com/articles/S14_1/1425046/2"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"5ad9b122f0e03a458a2ca2b94f88c279","mimetype":"application/zip","path":"S14_1.zip","platform_url":"https://ndownloader.figshare.com/files/2081115","sha1":"0b31d3e7d1e8423195f25f07bc30fcdbbef7eabe","sha256":"cf745f2b7c22bed50706dc57d553bb80e6a5339252bdd0033a6fdef44dfdc853","size":193762539,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"1425046","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.1425046.v2","ext_ids":{"doi":"10.6084/m9.figshare.1425046.v2"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"asbhbykb7bbuzjwmdzp65tlnwu"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.1425046.v2","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200220172238","terminal_sha1hex":"bb48b3ac77f92cc0f520fc5768a0b5c5de3c4ba6","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/S14_1/1425046/2"},"total_size":193762539}
+{"archiveorg_item_name":"figshare.com-155705-v1","cdx":{"datetime":"20200302100511","mimetype":"text/html","sha1b32":"FOQVA5RJVIRKE27DCV3VMNQXANNNEVVC","sha1hex":"2ba1507629aa22a26be31577563617035ad256a2","status_code":200,"surt":"com,figshare)/articles/genomic_signal_processing_one_scientist_s_quest/155705/1","url":"https://figshare.com/articles/Genomic_Signal_Processing_one_scientist_s_quest/155705/1","warc_csize":6768,"warc_offset":92251214,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200302092906472-35659-35722-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200302100304468-35694-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"838a7da2e12f06b8dac16865e0c9375f","mimetype":"text/html","sha1hex":"2ba1507629aa22a26be31577563617035ad256a2","sha256hex":"d4112a7f0ff3f3cd6113678008c8505d8d7d8ac11ecf014c49f08f6a93f6f27f","size_bytes":30273},"fileset_file":{"cdx":{"datetime":"20200214191603","mimetype":"binary/octet-stream","sha1b32":"WX25SVMOOQYM7QT73YVFQ7HEIFBYF3KR","sha1hex":"b5f5d9558e7430cfc27fde2a587ce4414382ed51","status_code":200,"surt":"com,amazonaws,s3-eu-west-1)/pfigshare-u-files/482151/gsponescientistsquest.pdf","url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/482151/GSPonescientistsquest.pdf","warc_csize":305643,"warc_offset":144050325,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200214182939473-13190-13253-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200214191434645-13242-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_meta":[{"md5hex":"b7cf070a3a21fbf3d96abf31c7331658","mimetype":"application/pdf","sha1hex":"b5f5d9558e7430cfc27fde2a587ce4414382ed51","sha256hex":"a356ae5fca5b8c105ce91f14719a5ec5d40bff31cea59fcf14fd8e0717c45db7","size_bytes":323355}],"terminal":{"terminal_dt":"20200214191603","terminal_status_code":200,"terminal_url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/482151/GSPonescientistsquest.pdf"}},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.155705.v1","https://figshare.com/articles/Genomic_Signal_Processing_one_scientist_s_quest/155705/1"],"ingest_strategy":"web-file","manifest":[{"md5":"b7cf070a3a21fbf3d96abf31c7331658","mimetype":"application/pdf","path":"GSP-one-scientists-quest.pdf","platform_url":"https://ndownloader.figshare.com/files/482151","sha1":"b7cf070a3a21fbf3d96abf31c7331658","sha256":"a356ae5fca5b8c105ce91f14719a5ec5d40bff31cea59fcf14fd8e0717c45db7","size":323355,"status":"success","terminal_dt":"20200214191603","terminal_url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/482151/GSPonescientistsquest.pdf"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"155705","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.155705.v1","ext_ids":{"doi":"10.6084/m9.figshare.155705.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"b2qxpak6cjbypcipjlsz6fk2ge"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.155705.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200302100511","terminal_sha1hex":"2ba1507629aa22a26be31577563617035ad256a2","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/Genomic_Signal_Processing_one_scientist_s_quest/155705/1"},"total_size":323355}
+{"archiveorg_item_name":"figshare.com-7883471-v1","cdx":{"datetime":"20200223130023","mimetype":"text/html","sha1b32":"BRGI7RR5RNRJR2N34H4SW7RMKYJBQOZR","sha1hex":"0c4c8fc63d8b6298e9bbe1f92b7e2c5612183b31","status_code":200,"surt":"com,figshare)/articles/gvcf_hg01113/7883471/1","url":"https://figshare.com/articles/gVCF_HG01113/7883471/1","warc_csize":6156,"warc_offset":135768315,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200223120354621-25701-25767-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200223125651217-25749-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"098f543205e102a924b402d6fd5aa2bd","mimetype":"text/html","sha1hex":"0c4c8fc63d8b6298e9bbe1f92b7e2c5612183b31","sha256hex":"937a34711a98d783b602a6c005a364d4025d7c211ca1cb9c5e1d9b6239169183","size_bytes":23652},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.7883471.v1","https://figshare.com/articles/gVCF_HG01113/7883471/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"b4a1733982a44b1eb1f4ee957eb7bf41","mimetype":"application/x-gzip","path":"HG01113.dedup.realigned.recalibrated.hc.g.vcf.gz","platform_url":"https://ndownloader.figshare.com/files/14675255","sha1":"b907f6e243820a09f10c001900fb99141537ac63","sha256":"ebae318f297acd3ce28f6d7bda92eaf863395a18594908dda016fe2632f7fd38","size":3558964608,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"7883471","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.7883471.v1","ext_ids":{"doi":"10.6084/m9.figshare.7883471.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"blw4at5lxvhcldwjapqipierkq"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.7883471.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200223130023","terminal_sha1hex":"0c4c8fc63d8b6298e9bbe1f92b7e2c5612183b31","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/gVCF_HG01113/7883471/1"},"total_size":3558964608}
+{"archiveorg_item_name":"figshare.com-1328466-v1","cdx":{"datetime":"20200213025244","mimetype":"text/html","sha1b32":"ECWCYMZ2PEZG7ZU2PVVNDTRXDR5BIFCM","sha1hex":"20ac2c333a79326fe69a7d6ad1ce371c7a14144c","status_code":200,"surt":"com,figshare)/articles/pecten_maximus_est_database/1328466/1","url":"https://figshare.com/articles/Pecten_maximus_EST_database/1328466/1","warc_csize":6335,"warc_offset":131119250,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200213020718819-10319-10382-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200213025003763-10366-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"a7d7b5e7e48d883ec67b921b4362bc82","mimetype":"text/html","sha1hex":"20ac2c333a79326fe69a7d6ad1ce371c7a14144c","sha256hex":"60f8da0818a1890aa3a9d01da3c9d36dc4934d9ef8a7df91b492e7a673dd055c","size_bytes":24908},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.1328466.v1","https://figshare.com/articles/Pecten_maximus_EST_database/1328466/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"42204274a103103ef8872ecf5cfb2bdd","mimetype":"text/plain","path":"pecten_maximus.fasta","platform_url":"https://ndownloader.figshare.com/files/1939770","sha1":"b0ac5f991fad6168deceff8cb144dd42f32ed8a7","sha256":"e81abab77b76ad7a4e6271777071391793623d17372fcd3477efd3fa0dd2d541","size":165410129,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"1328466","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.1328466.v1","ext_ids":{"doi":"10.6084/m9.figshare.1328466.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"azywvawlkfdt3g3tewt4zahfca"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.1328466.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200213025244","terminal_sha1hex":"20ac2c333a79326fe69a7d6ad1ce371c7a14144c","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/Pecten_maximus_EST_database/1328466/1"},"total_size":165410129}
+{"archiveorg_item_name":"figshare.com-7891043-v1","cdx":{"datetime":"20200210043948","mimetype":"text/html","sha1b32":"IRLPGL5NVEK74PJ2KOEYBXPVYUWBCYTW","sha1hex":"4456f32fada915fe3d3a538980ddf5c52c116276","status_code":200,"surt":"com,figshare)/articles/gvcf_na18907/7891043/1","url":"https://figshare.com/articles/gVCF_NA18907/7891043/1","warc_csize":6157,"warc_offset":17501773,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200210041721185-05270-05334-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200210043919713-05302-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"60c2be90859130938e3dae732b511047","mimetype":"text/html","sha1hex":"4456f32fada915fe3d3a538980ddf5c52c116276","sha256hex":"cdc0f95c1127389881aae42178f018b2fd373b7c46e34c7656d2085d17c3cad7","size_bytes":23652},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.7891043.v1","https://figshare.com/articles/gVCF_NA18907/7891043/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"8b7bb4984f8cb05a971df28b1188601f","mimetype":"application/x-gzip","path":"NA18907.dedup.realigned.recalibrated.hc.g.vcf.gz","platform_url":"https://ndownloader.figshare.com/files/14831225","sha1":"3abe9b16a7dd03f1872b1f7e60230cc2472f5363","sha256":"b8520f5b51fd3c8b9540a2fae43546441a860548fe696e05ca94bde4e4d7e55a","size":5080602128,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"7891043","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.7891043.v1","ext_ids":{"doi":"10.6084/m9.figshare.7891043.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"doq6plp2pfbbjaekby6qpboclq"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.7891043.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200210043948","terminal_sha1hex":"4456f32fada915fe3d3a538980ddf5c52c116276","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/gVCF_NA18907/7891043/1"},"total_size":5080602128}
+{"archiveorg_item_name":"figshare.com-7944293-v1","cdx":{"datetime":"20200219161852","mimetype":"text/html","sha1b32":"BPOM2XH7LDGXBZXHOFI7R2X5G6DV6JSF","sha1hex":"0bdccd5cff58cd70e6e77151f8eafd37875f2645","status_code":200,"surt":"com,figshare)/articles/gvcf_na18504/7944293/1","url":"https://figshare.com/articles/gVCF_NA18504/7944293/1","warc_csize":6161,"warc_offset":47228503,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200219151726124-20609-20673-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200219161738665-20670-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"388ca467f21de95c62fab428ff1f9594","mimetype":"text/html","sha1hex":"0bdccd5cff58cd70e6e77151f8eafd37875f2645","sha256hex":"db5c1adaca1b0466b3eac7d15b9ea348360e0d31e3be062f2301b8563428a740","size_bytes":23648},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.7944293.v1","https://figshare.com/articles/gVCF_NA18504/7944293/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"52473c022862137bd2f0262ff5654763","mimetype":"application/x-gzip","path":"NA18504.dedup.realigned.recalibrated.hc.g.vcf.gz","platform_url":"https://ndownloader.figshare.com/files/14789654","sha1":"b0a9dfe52fba77755eef81ea5ca1224218495975","sha256":"0931bb9376f0a95494b9f78e1bfb195e75f931b5ed8c53253ec6eb9acbea4165","size":9241364045,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"7944293","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.7944293.v1","ext_ids":{"doi":"10.6084/m9.figshare.7944293.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"dokkkeowufarnc7hc2lictjfpa"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.7944293.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200219161852","terminal_sha1hex":"0bdccd5cff58cd70e6e77151f8eafd37875f2645","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/gVCF_NA18504/7944293/1"},"total_size":9241364045}
+{"archiveorg_item_name":"figshare.com-96798-v1","cdx":{"datetime":"20200216095334","mimetype":"text/html","sha1b32":"EZJKDVBIPTSAZB43VHRKL6IXQ2UBLJRC","sha1hex":"2652a1d4287ce40c879ba9e2a5f91786a815a622","status_code":200,"surt":"com,figshare)/articles/fuelwood,_savannas,_and_climate_change:_integrating_modeling,_field_experimentation,_and_optical_and_radar_remote_sensing/96798/1","url":"https://figshare.com/articles/Fuelwood,_Savannas,_and_Climate_Change:_Integrating_Modeling,_Field_Experimentation,_and_Optical_and_Radar_Remote_Sensing/96798/1","warc_csize":6488,"warc_offset":121408362,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200216090256643-15150-15213-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200216095021545-15201-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"9733a4d39f63227597b185510119c53d","mimetype":"text/html","sha1hex":"2652a1d4287ce40c879ba9e2a5f91786a815a622","sha256hex":"6352c8db9c37c4ff806d673a0adc204e70e904156b8caae61065bc4db2dc852f","size_bytes":27330},"fileset_file":{"cdx":{"datetime":"20200221054157","mimetype":"binary/octet-stream","sha1b32":"A6NUHHMLZR4ADDLH7WBXXDHDBVVO2POU","sha1hex":"079b439d8bcc78018d67fd837b8ce30d6aed3dd4","status_code":200,"surt":"com,amazonaws,s3-eu-west-1)/pfigshare-u-files/101362/final_nessf11_tredennick.pdf","url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/101362/FINAL_NESSF11_Tredennick.pdf","warc_csize":2831911,"warc_offset":148846417,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200221042341296-22665-22731-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200221053650453-22721-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_meta":[{"md5hex":"955ed359be13ad13d4ab39cc6e0d1795","mimetype":"application/pdf","sha1hex":"079b439d8bcc78018d67fd837b8ce30d6aed3dd4","sha256hex":"6660a07237da9050e1ed168b491f87057af5cd33e3cbe934ea2cb26f8c125d64","size_bytes":3141366}],"terminal":{"terminal_dt":"20200221054157","terminal_status_code":200,"terminal_url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/101362/FINAL_NESSF11_Tredennick.pdf"}},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.96798.v1","https://figshare.com/articles/Fuelwood,_Savannas,_and_Climate_Change:_Integrating_Modeling,_Field_Experimentation,_and_Optical_and_Radar_Remote_Sensing/96798/1"],"ingest_strategy":"web-file","manifest":[{"md5":"955ed359be13ad13d4ab39cc6e0d1795","mimetype":"application/pdf","path":"FINAL_NESSF11_Tredennick.pdf","platform_url":"https://ndownloader.figshare.com/files/101362","sha1":"955ed359be13ad13d4ab39cc6e0d1795","sha256":"6660a07237da9050e1ed168b491f87057af5cd33e3cbe934ea2cb26f8c125d64","size":3141366,"status":"success","terminal_dt":"20200221054157","terminal_url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/101362/FINAL_NESSF11_Tredennick.pdf"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"96798","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.96798.v1","ext_ids":{"doi":"10.6084/m9.figshare.96798.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"do72fzxjlzhpjjub7pscjymvde"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.96798.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200216095334","terminal_sha1hex":"2652a1d4287ce40c879ba9e2a5f91786a815a622","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/Fuelwood,_Savannas,_and_Climate_Change:_Integrating_Modeling,_Field_Experimentation,_and_Optical_and_Radar_Remote_Sensing/96798/1"},"total_size":3141366}
+{"archiveorg_item_name":"figshare.com-7059653-v1","cdx":{"datetime":"20200219071230","mimetype":"text/html","sha1b32":"PXELMU4HEPJASJLGBIPTWWJ7R3LVTMAA","sha1hex":"7dc8b6538723d20925660a1f3b593f8ed759b000","status_code":200,"surt":"com,figshare)/articles/gwr_air_temperature_for_year_2012/7059653/1","url":"https://figshare.com/articles/GWR_air_temperature_for_year_2012/7059653/1","warc_csize":6373,"warc_offset":82312661,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200219062845359-19514-19579-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200219071046185-19559-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"030ab36d3053dcaa7ad14afb9eec9ab9","mimetype":"text/html","sha1hex":"7dc8b6538723d20925660a1f3b593f8ed759b000","sha256hex":"760244a3cdca50ee50a41b3285ad7f3dff6045eed4f72e61f8387acaf9c1c20d","size_bytes":25691},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.7059653.v1","https://figshare.com/articles/GWR_air_temperature_for_year_2012/7059653/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"91ae25cbb68a92d067da63c70855e979","mimetype":"application/x-hdf","path":"Tair_GWR_2012.nc","platform_url":"https://ndownloader.figshare.com/files/12983516","sha1":"b0bb6b49c276bea61e90dfb553013bc7b0ae6e0f","sha256":"c7b9aba099374b613e636d0c84f321b1301834f77ba346f9b7aa8d0626134b55","size":168632217,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"7059653","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.7059653.v1","ext_ids":{"doi":"10.6084/m9.figshare.7059653.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"aq5xmvnwz5anhmumsvlnc3b7qi"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.7059653.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200219071230","terminal_sha1hex":"7dc8b6538723d20925660a1f3b593f8ed759b000","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/GWR_air_temperature_for_year_2012/7059653/1"},"total_size":168632217}
+{"archiveorg_item_name":"figshare.com-1290730-v1","cdx":{"datetime":"20200218045018","mimetype":"text/html","sha1b32":"DIGYAYMD2GXMU4SKQIO4QGJFBIR37VVA","sha1hex":"1a0d806183d1aeca724a821dc819250a23bfd6a0","status_code":200,"surt":"com,figshare)/articles/difficult_airway_in_thyroid_surgery/1290730/1","url":"https://figshare.com/articles/Difficult_airway_in_thyroid_surgery/1290730/1","warc_csize":6943,"warc_offset":67583985,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200218040911124-17891-17955-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200218044808327-17934-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"24b87928eb315579ce5263884b4bcbec","mimetype":"text/html","sha1hex":"1a0d806183d1aeca724a821dc819250a23bfd6a0","sha256hex":"4fbafb454edccba5ab0ebaf90e40f6709ce99d9326ab9a18e2325a02f558c1be","size_bytes":32119},"fileset_file":{"cdx":{"datetime":"20200218045039","mimetype":"binary/octet-stream","sha1b32":"RS5T57Z2H6ZIMYM35YSPSK6QYJXJ5EEF","sha1hex":"8cbb3eff3a3fb286619bee24f92bd0c26e9e9085","status_code":200,"surt":"com,amazonaws,s3-eu-west-1)/pfigshare-u-files/1867824/revistaorl201118_viaaereatiroides.pdf","url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/1867824/revistaorl201118_viaaereatiroides.pdf","warc_csize":356344,"warc_offset":57825647,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200218040911124-17891-17955-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200218044857403-17936-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_meta":[{"md5hex":"bdc2ca5f3b996c24890ee706a5d91fce","mimetype":"application/pdf","sha1hex":"8cbb3eff3a3fb286619bee24f92bd0c26e9e9085","sha256hex":"9acc5aac353c6ff40fb1792d5a41b2f4306bfb75f016965100b94a07de24c5e7","size_bytes":410344}],"terminal":{"terminal_dt":"20200218045039","terminal_status_code":200,"terminal_url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/1867824/revistaorl201118_viaaereatiroides.pdf"}},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.1290730.v1","https://figshare.com/articles/Difficult_airway_in_thyroid_surgery/1290730/1"],"html_biblio":{"abstract":"Background: Thyroid surgery has been considered as a risk factor for the management of difficult airway. Method: We review the relevant literature related to the difficult airway and thyroid surgery. Comments: Endothoracic goiter, deformity of the airway, compression of the airway and thyroid cancer do not appear to have an influence on the difficulty for intubation. Classical parameters have a better predictive value: mouth opening less than 35 mm, Mallampati II-IV, short neck, neck movility less than 80 degrees, thyromental distance less than 60 mm, retrognathia, Cormack index III-IV, macroglossia, inability to protude the teeth and body mass index (BMI) greater than 35 Kg/m2. Conclusions: According to the literature, thyroid disease does not increase the risk for a difficult airway (ventilation / intubation). The risk of an unexpected difficult airway (false negative) cannot be eliminated. All patients must undergo an assessment of the airway","container_name":"figshare","contrib_names":["Javier CuelloAzcárate, Jesús","Pardal-Refoyo, Jose Luis","Almudena Romero-Rapado, ","Pilar Toro-Pinilla, María"],"doi":"10.6084/m9.figshare.1290730.v1","pdf_fulltext_url":"https://figshare.com/articles/Difficult_airway_in_thyroid_surgery/1290730/files/1867824.pdf","publisher":"figshare","release_date":"2015-01-18","title":"Difficult airway in thyroid surgery"},"ingest_strategy":"web-file","manifest":[{"md5":"bdc2ca5f3b996c24890ee706a5d91fce","mimetype":"application/pdf","path":"revistaorl201118_viaaereatiroides.pdf","platform_url":"https://ndownloader.figshare.com/files/1867824","sha1":"bdc2ca5f3b996c24890ee706a5d91fce","sha256":"9acc5aac353c6ff40fb1792d5a41b2f4306bfb75f016965100b94a07de24c5e7","size":410344,"status":"success","terminal_dt":"20200218045039","terminal_url":"https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/1867824/revistaorl201118_viaaereatiroides.pdf"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"1290730","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.1290730.v1","ext_ids":{"doi":"10.6084/m9.figshare.1290730.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"eftssxll5ngipgmue5ahnsan3i"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.1290730.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200218045018","terminal_sha1hex":"1a0d806183d1aeca724a821dc819250a23bfd6a0","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/Difficult_airway_in_thyroid_surgery/1290730/1"},"total_size":410344}
+{"archiveorg_item_name":"figshare.com-7879991-v1","cdx":{"datetime":"20200301215510","mimetype":"text/html","sha1b32":"IINKFPDJIAXYVLDYVUXSO6EGJPNEKR6P","sha1hex":"421aa2bc69402f8aac78ad2f2778864bda4547cf","status_code":200,"surt":"com,figshare)/articles/gvcf_hg00530/7879991/1","url":"https://figshare.com/articles/gVCF_HG00530/7879991/1","warc_csize":6343,"warc_offset":45356,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200301213356805-35009-35074-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200301215511721-35022-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"2a01939b84dabdaa28ceb363d0319442","mimetype":"text/html","sha1hex":"421aa2bc69402f8aac78ad2f2778864bda4547cf","sha256hex":"7b5f2f51bad52c73d7325d991fc77b7786957b1a9b356fb5bf58f5efa402a9a4","size_bytes":26774},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.7879991.v1","https://figshare.com/articles/gVCF_HG00530/7879991/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"86c5baf03e03173d200e2e1a1cc6fd79","mimetype":"application/x-gzip","path":"HG00530.dedup.realigned.recalibrated.hc.g.vcf.gz","platform_url":"https://ndownloader.figshare.com/files/14670497","sha1":"c3f0cc4b4d65b730d7b4cafcfc621d187f301ba0","sha256":"124f0fd047046e892275d862643e9814c8db63ef5cff8c7d44c25ec8c8d73841","size":5957749616,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"7879991","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.7879991.v1","ext_ids":{"doi":"10.6084/m9.figshare.7879991.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"ccata4ww7nekflu2l5rhtn3b3i"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.7879991.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200301215510","terminal_sha1hex":"421aa2bc69402f8aac78ad2f2778864bda4547cf","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/gVCF_HG00530/7879991/1"},"total_size":5957749616}
+{"archiveorg_item_name":"figshare.com-4007418-v2","cdx":{"datetime":"20200212070037","mimetype":"text/html","sha1b32":"CFQUF74PASKLKUQYTY5MCOP3THNV4J2S","sha1hex":"116142ff8f0494b552189e3ac139fb99db5e2752","status_code":200,"surt":"com,figshare)/articles/ch4_pasr_data_bin/4007418/2","url":"https://figshare.com/articles/ch4_pasr_data_bin/4007418/2","warc_csize":7001,"warc_offset":116323768,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200212065257603-08978-09042-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200212065830590-08986-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"bdab1e11a9e10714992091fa4db91935","mimetype":"text/html","sha1hex":"116142ff8f0494b552189e3ac139fb99db5e2752","sha256hex":"77562652fa99ec2223b3daec53818e2024e98c191207595bccc9f35f96da0e0a","size_bytes":31495},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.4007418.v2","https://figshare.com/articles/ch4_pasr_data_bin/4007418/2"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"7a4bdb4b9329e2937db5b5639ebee7c4","mimetype":"application/zip","path":"ch4_pasr_data.npz","platform_url":"https://ndownloader.figshare.com/files/6463836","sha1":"7e496df234c2d4aa9adbadac99a2009787cd09ef","sha256":"3cf46e8198343fc25c50c1f047c69ac0672cd333f97d355726ac2f6d65e24524","size":202004950,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"4007418","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.4007418.v2","ext_ids":{"doi":"10.6084/m9.figshare.4007418.v2"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"bub6fa6iljfbfaguuxu36etywq"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.4007418.v2","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200212070037","terminal_sha1hex":"116142ff8f0494b552189e3ac139fb99db5e2752","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/ch4_pasr_data_bin/4007418/2"},"total_size":202004950}
+{"archiveorg_item_name":"figshare.com-8280893-v1","cdx":{"datetime":"20200228033059","mimetype":"text/html","sha1b32":"JCV325BG2DKHSLOXR4VKRVYUJX3LN6ML","sha1hex":"48abbd7426d0d4792dd78f2aa8d7144df6b6f98b","status_code":200,"surt":"com,figshare)/articles/dataset_during_rhythmic_pointing_joint-angles_are_temporary_linked_in_different_synergies_with_similar_relative_joint-angle_coupling_across_task_constraints_/8280893/1","url":"https://figshare.com/articles/Dataset_During_rhythmic_pointing_joint-angles_are_temporary_linked_in_different_synergies_with_similar_relative_joint-angle_coupling_across_task_constraints_/8280893/1","warc_csize":6670,"warc_offset":84669003,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200228023255589-30612-30678-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200228032853298-30662-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"038f0c60fd6b79dfc3ef26d89a74f68f","mimetype":"text/html","sha1hex":"48abbd7426d0d4792dd78f2aa8d7144df6b6f98b","sha256hex":"a7dba4ca5e575b0bdfe2f9d944b157c4b057c5257d2062a67e007da4d9ffa70e","size_bytes":31199},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.8280893.v1","https://figshare.com/articles/Dataset_During_rhythmic_pointing_joint-angles_are_temporary_linked_in_different_synergies_with_similar_relative_joint-angle_coupling_across_task_constraints_/8280893/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"12c3ff84915fc677fef4126cbcf3f52d","mimetype":"application/x-rar","path":"Data Valk et al 2019 SciReports.rar","platform_url":"https://ndownloader.figshare.com/files/15482963","sha1":"fb1d8152c7dd6048f652e1890a6bff6a679e0cea","sha256":"69fc117e07121e2a43320b6a24cf04ffc41d28069a142ae8b44a79b2b52f2320","size":1320490575,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"8280893","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.8280893.v1","ext_ids":{"doi":"10.6084/m9.figshare.8280893.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"db6v4erinjcllo6mq5wmxlvqby"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.8280893.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200228033059","terminal_sha1hex":"48abbd7426d0d4792dd78f2aa8d7144df6b6f98b","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/Dataset_During_rhythmic_pointing_joint-angles_are_temporary_linked_in_different_synergies_with_similar_relative_joint-angle_coupling_across_task_constraints_/8280893/1"},"total_size":1320490575}
+{"archiveorg_item_name":"figshare.com-7885169-v1","cdx":{"datetime":"20200220015119","mimetype":"text/html","sha1b32":"7IBKPBR3AO5XRAAAW77NGEOUUMS3BUP3","sha1hex":"fa02a7863b03bb788000b7fed311d4a325b0d1fb","status_code":200,"surt":"com,figshare)/articles/gvcf_hg01256/7885169/1","url":"https://figshare.com/articles/gVCF_HG01256/7885169/1","warc_csize":6154,"warc_offset":43108844,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200220014610521-21205-21270-wbgrp-svc280/DATACITE-DOI-CRAWL-2020-01-20200220015026708-21210-3211~wbgrp-svc280.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"fb96082852b6c510739ca4e3dfc04d4a","mimetype":"text/html","sha1hex":"fa02a7863b03bb788000b7fed311d4a325b0d1fb","sha256hex":"69db668dda74567b7868e74d35695cccf6bc61a43b205cca7014f7b5829ef806","size_bytes":23648},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.7885169.v1","https://figshare.com/articles/gVCF_HG01256/7885169/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"b4ae15f84e5bb15a1856df21071f601b","mimetype":"application/x-gzip","path":"HG01256.dedup.realigned.recalibrated.hc.g.vcf.gz","platform_url":"https://ndownloader.figshare.com/files/14677301","sha1":"2d6b4b16baf60eaf5c64f6ea9fb8aec8ab8c8e86","sha256":"1d7c859b2cf0dce0536520d434992e180002d193fd7b650a1d501c4fe491b990","size":5896179103,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"7885169","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.7885169.v1","ext_ids":{"doi":"10.6084/m9.figshare.7885169.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"dlsawja46fahtn7llhrhaeh6gu"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.7885169.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200220015119","terminal_sha1hex":"fa02a7863b03bb788000b7fed311d4a325b0d1fb","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/gVCF_HG01256/7885169/1"},"total_size":5896179103}
+{"archiveorg_item_name":"figshare.com-1504026-v1","cdx":{"datetime":"20200302200059","mimetype":"text/html","sha1b32":"DUX3N7QY5EQNYO3G6P3AETT7J3FMGECQ","sha1hex":"1d2fb6fe18e920dc3b66f3f6024e7f4ecac31050","status_code":200,"surt":"com,figshare)/articles/fish_proj800_trans_part3/1504026/1","url":"https://figshare.com/articles/FISH_PROJ800_TRANS_part3/1504026/1","warc_csize":7072,"warc_offset":788146,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200302190810451-34977-35042-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200302200055284-35013-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"679e6dc9a4b89ee8012a994bc53f53d1","mimetype":"text/html","sha1hex":"1d2fb6fe18e920dc3b66f3f6024e7f4ecac31050","sha256hex":"432fbd760f0e56f9185fb83f41acdb92c15fbaaa14c633e50f70ded61d55cf4a","size_bytes":33179},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.1504026.v1","https://figshare.com/articles/FISH_PROJ800_TRANS_part3/1504026/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"ae49f8baacd5ca26fef939db1afe33f3","mimetype":"application/zip","path":"FISH_PROJ800_TRANS_part3.zip","platform_url":"https://ndownloader.figshare.com/files/3378803","sha1":"9fb06893afb498be679c959f77f6f0f618247d54","sha256":"2bb53ab934ba4de9706ee1f9dc08fd30ccd3746e41b39aedf716cc1d5dc33e49","size":200446950,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"1504026","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.1504026.v1","ext_ids":{"doi":"10.6084/m9.figshare.1504026.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"c65bw4qtcjgoto7kqgklyp5fve"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.1504026.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200302200059","terminal_sha1hex":"1d2fb6fe18e920dc3b66f3f6024e7f4ecac31050","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/FISH_PROJ800_TRANS_part3/1504026/1"},"total_size":200446950}
+{"archiveorg_item_name":"figshare.com-7895180-v1","cdx":{"datetime":"20200224231540","mimetype":"text/html","sha1b32":"WNSGMXOO3NNYUBLRFOG33B7GJYXRBKPT","sha1hex":"b364665dcedb5b8a05712b8dbd87e64e2f10a9f3","status_code":200,"surt":"com,figshare)/articles/gvcf_hg03190/7895180/1","url":"https://figshare.com/articles/gVCF_HG03190/7895180/1","warc_csize":6348,"warc_offset":146971019,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200224223743553-26853-26919-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200224231109634-26881-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"c6912cf73aa77856598d7ec0391235d4","mimetype":"text/html","sha1hex":"b364665dcedb5b8a05712b8dbd87e64e2f10a9f3","sha256hex":"9060112d112f26bff392fd895c37629b0b30b859a33ab80c98bf195d63ebed47","size_bytes":26766},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.7895180.v1","https://figshare.com/articles/gVCF_HG03190/7895180/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"36307bca6fc019e5f0faff1d5264c3c1","mimetype":"application/x-gzip","path":"HG03190.dedup.realigned.recalibrated.hc.g.vcf.gz","platform_url":"https://ndownloader.figshare.com/files/14702432","sha1":"9a259e2741612b1b0611dc5b4b6400ca6f74b063","sha256":"707c614fdfd274f64d4b7e82596a157f0b3faecf1c6157aabd0a4cc61bc6af23","size":5148586592,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"7895180","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.7895180.v1","ext_ids":{"doi":"10.6084/m9.figshare.7895180.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"cfmk2v7sdjfurerhfj7zooij6u"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.7895180.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200224231540","terminal_sha1hex":"b364665dcedb5b8a05712b8dbd87e64e2f10a9f3","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/gVCF_HG03190/7895180/1"},"total_size":5148586592}
+{"archiveorg_item_name":"figshare.com-7932278-v1","cdx":{"datetime":"20200208045817","mimetype":"text/html","sha1b32":"TK2EV6VFMRYO4DXSTQYGJHAWYV3AAPWS","sha1hex":"9ab44afaa56470ee0ef29c30649c16c576003ed2","status_code":200,"surt":"com,figshare)/articles/gvcf_na19795/7932278/1","url":"https://figshare.com/articles/gVCF_NA19795/7932278/1","warc_csize":6158,"warc_offset":30962659,"warc_path":"DATACITE-DOI-CRAWL-2020-01-20200208045034449-01427-01489-wbgrp-svc281/DATACITE-DOI-CRAWL-2020-01-20200208045800643-01437-3252~wbgrp-svc281.us.archive.org~8443.warc.gz"},"file_count":1,"file_meta":{"md5hex":"79db15c64560af1da31510b50b706c6d","mimetype":"text/html","sha1hex":"9ab44afaa56470ee0ef29c30649c16c576003ed2","sha256hex":"96ff0c36ace08a420d50079ec96be766c56750e8bd1a93604e415aa149d96a57","size_bytes":23648},"fileset_file":{},"hit":true,"hops":["https://doi.org/10.6084/m9.figshare.7932278.v1","https://figshare.com/articles/gVCF_NA19795/7932278/1"],"ingest_strategy":"archiveorg-file","manifest":[{"md5":"7a9dc016f5b4d369a5a2f2353e57bcb1","mimetype":"application/x-gzip","path":"NA19795.dedup.realigned.recalibrated.hc.g.vcf.gz","platform_url":"https://ndownloader.figshare.com/files/14760185","sha1":"df6ff3a1378a6538aacf8aa06da6d12cd45b94b8","sha256":"3ccb1c811851f5dff59227f2f3deaaa88aeb0c56fcb81eaa3f01a7bdf3546062","size":5574989369,"status":"success"}],"platform_base_url":null,"platform_domain":"figshare.com","platform_id":"7932278","platform_name":"figshare","request":{"base_url":"https://doi.org/10.6084/m9.figshare.7932278.v1","ext_ids":{"doi":"10.6084/m9.figshare.7932278.v1"},"fatcat":{"release_ident":"aaaaaaaaaaaaarceaaaaaaaaam","work_ident":"byoabgtkdvhatpx7sqievxqktq"},"ingest_request_source":"fatcat-ingest","ingest_type":"dataset","link_source":"doi","link_source_id":"10.6084/m9.figshare.7932278.v1","release_stage":"published"},"status":"success-file","terminal":{"terminal_dt":"20200208045817","terminal_sha1hex":"9ab44afaa56470ee0ef29c30649c16c576003ed2","terminal_status_code":200,"terminal_url":"https://figshare.com/articles/gVCF_NA19795/7932278/1"},"total_size":5574989369}
diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py
index 44dd5a0b..8f8a34c7 100644
--- a/python/tests/import_ingest.py
+++ b/python/tests/import_ingest.py
@@ -5,6 +5,7 @@ from fixtures import *
from fatcat_tools.importers import (
IngestFileResultImporter,
+ IngestFilesetFileResultImporter,
IngestFilesetResultImporter,
IngestWebResultImporter,
JsonLinePusher,
@@ -26,6 +27,11 @@ def ingest_fileset_importer(api):
yield IngestFilesetResultImporter(api)
+@pytest.fixture(scope="function")
+def ingest_fileset_file_importer(api):
+ yield IngestFilesetFileResultImporter(api)
+
+
# TODO: use API to check that entities actually created...
def test_ingest_importer_basic(ingest_importer):
with open("tests/files/example_ingest.json", "r") as f:
@@ -238,3 +244,57 @@ def test_ingest_fileset_importer(ingest_fileset_importer):
assert counts["exists"] == 7
assert counts["skip"] == 13
assert counts["skip-release-not-found"] == 13
+
+
+def test_ingest_fileset_file_dict_parse(ingest_fileset_file_importer):
+ with open("tests/files/example_fileset_file_ingest_result.json", "r") as f:
+ raw = json.loads(f.readline())
+ fe = ingest_fileset_file_importer.parse_record(raw)
+ assert fe.sha1 == "6fb020064da66bb7a666c17555611cf6820fc9ae"
+ assert fe.md5 == "dfc41b617564f99a12e6077a6208876f"
+ assert fe.sha256 == "2febad53ff0f163a18d7cbb913275bf99ed2544730cda191458837e2b0da9d18"
+ assert fe.mimetype == "image/tiff"
+ assert fe.size == 410631015
+ assert fe.extra["path"] == "NDVI_Diff_1990_2018_T06.tif"
+ assert len(fe.urls) == 2
+ for u in fe.urls:
+ if u.rel == "repository":
+ assert u.url == "https://ndownloader.figshare.com/files/14460875"
+ if u.rel == "archive":
+ assert (
+ u.url
+ == "https://archive.org/download/springernature.figshare.com-7767695-v1/NDVI_Diff_1990_2018_T06.tif"
+ )
+ assert len(fe.release_ids) == 1
+
+
+def test_ingest_fileset_file_importer(ingest_fileset_file_importer):
+ """
+ Similar to the above, but specifically tests 'file'/'success-file' import pathway
+ """
+ last_index = ingest_fileset_file_importer.api.get_changelog(limit=1)[0].index
+ with open("tests/files/example_fileset_file_ingest_result.json", "r") as f:
+ ingest_fileset_file_importer.bezerk_mode = True
+ counts = JsonLinePusher(ingest_fileset_file_importer, f).run()
+ assert counts["insert"] == 16
+ assert counts["exists"] == 0
+ assert counts["skip"] == 4
+ assert counts["skip-bad-hashes"] == 4
+
+ # fetch most recent editgroup
+ change = ingest_fileset_file_importer.api.get_changelog_entry(index=last_index + 1)
+ eg = change.editgroup
+ assert eg.description
+ assert "crawled from web" in eg.description.lower()
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.IngestFilesetFileResultImporter" in eg.extra["agent"]
+
+ # re-insert; should skip
+ with open("tests/files/example_fileset_file_ingest_result.json", "r") as f:
+ ingest_fileset_file_importer.reset()
+ ingest_fileset_file_importer.bezerk_mode = False
+ counts = JsonLinePusher(ingest_fileset_file_importer, f).run()
+ assert counts["insert"] == 0
+ assert counts["exists"] == 16
+ assert counts["skip"] == 4
+ assert counts["skip-bad-hashes"] == 4