From 826c7538e091fac14d987a3cd654975da964e240 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 27 Oct 2021 18:50:17 -0700 Subject: make fmt (black 21.9b0) --- python/scripts/archiveorg_fileset.py | 99 ++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 49 deletions(-) (limited to 'python/scripts/archiveorg_fileset.py') diff --git a/python/scripts/archiveorg_fileset.py b/python/scripts/archiveorg_fileset.py index 83c04e3..6328f52 100755 --- a/python/scripts/archiveorg_fileset.py +++ b/python/scripts/archiveorg_fileset.py @@ -16,32 +16,31 @@ from typing import Any import internetarchive FORMAT_TO_MIMETYPE = { - 'BZIP': 'application/x-bzip', - 'BZIP2': 'application/x-bzip2', - 'ZIP': 'application/zip', - 'GZIP': 'application/gzip', - 'RAR': 'application/vnd.rar', - 'TAR': 'application/x-tar', - '7z': 'application/x-7z-compressed', - 'HTML': 'text/html', - 'Text': 'text/plain', - 'PDF': 'application/pdf', - 'CSV': 'text/csv', - 'XML': 'application/xml', - 'JSON': 'application/json', - + "BZIP": "application/x-bzip", + "BZIP2": "application/x-bzip2", + "ZIP": "application/zip", + "GZIP": "application/gzip", + "RAR": "application/vnd.rar", + "TAR": "application/x-tar", + "7z": "application/x-7z-compressed", + "HTML": "text/html", + "Text": "text/plain", + "PDF": "application/pdf", + "CSV": "text/csv", + "XML": "application/xml", + "JSON": "application/json", #'application/msword (.doc)', # .doc #'application/vnd.openxmlformats-officedocument.wordprocessingml.document', # .docx #'application/vnd.ms-excel', # .xls #'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', # .xlsx - 'MP3': 'audio/mpeg', # .mp3 - 'MP4': 'video/mp4', # .mp4 - 'MPEG': 'video/mpeg', # .mpeg - 'JPEG': 'image/jpeg', - 'GIF': 'image/gif', - 'PNG': 'image/png', - 'TIFF': 'image/tiff', - 'Unknown': None, + "MP3": "audio/mpeg", # .mp3 + "MP4": "video/mp4", # .mp4 + "MPEG": "video/mpeg", # .mpeg + "JPEG": "image/jpeg", + "GIF": "image/gif", + "PNG": "image/png", + "TIFF": "image/tiff", + "Unknown": None, } @@ -49,22 +48,22 @@ def want_file(f: dict, item_name: str) -> bool: """ Filters IA API files """ - if f.source != 'original': + if f.source != "original": return False for suffix in [ - '_meta.sqlite', - '_archive.torrent', - '_itemimage.jpg', - '_meta.xml', - '_thumb.png', - '_files.xml', + "_meta.sqlite", + "_archive.torrent", + "_itemimage.jpg", + "_meta.xml", + "_thumb.png", + "_files.xml", ]: if f.name == item_name + suffix or f.name == item_name.lower() + suffix: return False - if f.name.startswith('_'): + if f.name.startswith("_"): return False - if item_name.startswith('academictorrents_'): - for suffix in ['_academictorrents.torrent', '_academictorrents_torrent.txt', '.bib']: + if item_name.startswith("academictorrents_"): + for suffix in ["_academictorrents.torrent", "_academictorrents_torrent.txt", ".bib"]: if f.name == item_name + suffix: return False return True @@ -77,36 +76,38 @@ def parse_file(f: dict) -> dict: assert f.name and f.sha1 and f.md5 assert f.name is not None mf = { - 'path': f.name, - 'size': int(f.size), - 'sha1': f.sha1, - 'md5': f.md5, + "path": f.name, + "size": int(f.size), + "sha1": f.sha1, + "md5": f.md5, } # TODO: will disable this hard check eventually and replace with: - #mimetype = FORMAT_TO_MIMETYPE.get(f.format) + # mimetype = FORMAT_TO_MIMETYPE.get(f.format) mimetype = FORMAT_TO_MIMETYPE[f.format] if mimetype: - mf['extra'] = dict(mimetype=mimetype) + mf["extra"] = dict(mimetype=mimetype) return mf def item_to_fileset(item_name: str, release_id: str, session: internetarchive.ArchiveSession): print(f"processing item={item_name} release_id={release_id}", file=sys.stderr) - if release_id.startswith('release_'): + if release_id.startswith("release_"): release_id = release_id[9:] assert len(release_id) == 26 item = session.get_item(item_name) - assert item.metadata['mediatype'] not in ['collection', 'web'] + assert item.metadata["mediatype"] not in ["collection", "web"] item_files = item.get_files(on_the_fly=False) manifest = [parse_file(f) for f in item_files if want_file(f, item_name)] fileset = { - 'manifest': manifest, - 'urls': [{ - 'rel': 'archive', - 'url': f'https://archive.org/download/{item_name}/', - }, ], - 'release_ids': [release_id], - #extra={}, + "manifest": manifest, + "urls": [ + { + "rel": "archive", + "url": f"https://archive.org/download/{item_name}/", + }, + ], + "release_ids": [release_id], + # extra={}, } print(json.dumps(fileset)) return fileset @@ -123,12 +124,12 @@ def main(): line = line.strip() if not line: continue - fields = line.split('\t') + fields = line.split("\t") assert len(fields) == 2 item_name = fields[0] release_id = fields[1] item_to_fileset(item_name, release_id=release_id, session=session) -if __name__ == '__main__': +if __name__ == "__main__": main() -- cgit v1.2.3