fileset ingest small tweaks

author: Bryan Newbold <bnewbold@robocracy.org> 2021-10-12 14:44:44 -0700
committer: Bryan Newbold <bnewbold@robocracy.org> 2021-10-14 18:11:12 -0700
commit: 5eccb38074104960d88df00805d0ebd7ecf839f9 (patch)
tree: a11b298e49e73686a5afc7bc7b34e1d0b0a86a79 /python/fatcat_tools
parent: 75baf7d423a2cb119bd485672a00fd664e32537c (diff)
download: fatcat-5eccb38074104960d88df00805d0ebd7ecf839f9.tar.gz
fatcat-5eccb38074104960d88df00805d0ebd7ecf839f9.zip
1 files changed, 36 insertions, 21 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 38639297..36d72651 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -624,30 +624,45 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
         # XXX: create URLs and rel for dataset ingest
         if not row.get('strategy'):
             return []
-        if row['strategy'].startswith('archiveorg') and row.get('archiveorg_item_name'):
-            return [
-                fatcat_openapi_client.FilesetUrl(
-                    url=f"https://archive.org/download/{row['archiveorg_item_name']}",
-                    rel="archive",
-                )
-            ]
-        elif row['strategy'].startswith('web') and row.get('web_base_url'):
-            return [
-                fatcat_openapi_client.FilesetUrl(
-                    url=f"https://web.archive.org/web/{row['web_base_url_dt']}/{row['web_base_url']}",
-                    rel="webarchive",
-                )
-            ]
-        elif row['strategy'] == 'web-file-bundle' and row.get('web_bundle_url'):
-            return [
-                fatcat_openapi_client.FilesetUrl(
-                    url=f"https://web.archive.org/web/{row['web_bundle_url_dt']}/{row['web_bundle_url']}",
-                    rel="webarchive",
-                )
-            ]
+        urls = []
+        if row['strategy'] == 'archiveorg-fileset' and row.get('archiveorg_item_name'):
+            urls.append(fatcat_openapi_client.FilesetUrl(
+                url=f"https://archive.org/download/{row['archiveorg_item_name']}/",
+                rel="archive",
+            ))
+        elif row['strategy'] == 'archiveorg-file-hundle' and row.get('archiveorg_item_name'):
+            # XXX: what is the filename of bundle?
+            urls.append(fatcat_openapi_client.FilesetUrl(
+                url=f"https://archive.org/download/{row['archiveorg_item_name']}/",
+                rel="archive",
+            ))
+        elif row['strategy'].startswith('web') and row.get('platform_base_url'):
+            urls.append(fatcat_openapi_client.FilesetUrl(
+                url=f"https://web.archive.org/web/{row['web_base_url_dt']}/{row['web_base_url']}",
+                rel="webarchive",
+            ))
+        elif row['strategy'] == 'web-file-bundle' and row.get('platform_bundle_url'):
+            urls.append(fatcat_openapi_client.FilesetUrl(
+                url=f"https://web.archive.org/web/{row['web_bundle_url_dt']}/{row['web_bundle_url']}",
+                rel="webarchive",
+            ))
         else:
+            # if no archival URLs, bail out
             return []
 
+        # add any additional / platform URLs here
+        if row.get('platform_bundle_url'):
+            urls.append(fatcat_openapi_client.FilesetUrl(
+                url=row['platform_bundle_url'],
+                rel="repository-bundle",
+            ))
+        if row.get('platform_base_url'):
+            urls.append(fatcat_openapi_client.FilesetUrl(
+                url=row['platform_bundle_url'],
+                rel="repository",
+            ))
+        return urls
+
     def parse_record(self, row):
 
         request = row['request']
author	Bryan Newbold <bnewbold@robocracy.org>	2021-10-12 14:44:44 -0700
committer	Bryan Newbold <bnewbold@robocracy.org>	2021-10-14 18:11:12 -0700
commit	5eccb38074104960d88df00805d0ebd7ecf839f9 (patch)
tree	a11b298e49e73686a5afc7bc7b34e1d0b0a86a79 /python/fatcat_tools
parent	75baf7d423a2cb119bd485672a00fd664e32537c (diff)
download	fatcat-5eccb38074104960d88df00805d0ebd7ecf839f9.tar.gz fatcat-5eccb38074104960d88df00805d0ebd7ecf839f9.zip