wrap up previous renaming work

author: Bryan Newbold <bnewbold@archive.org> 2021-10-04 16:12:19 -0700
committer: Bryan Newbold <bnewbold@archive.org> 2021-10-15 18:15:25 -0700
commit: 4b3d6cb79a7182be4976aab34db251ecbcbd2665 (patch)
tree: cfdd6a5223b38a288af8806e08410365022be8ea /python
parent: e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e (diff)
download: sandcrawler-4b3d6cb79a7182be4976aab34db251ecbcbd2665.tar.gz
sandcrawler-4b3d6cb79a7182be4976aab34db251ecbcbd2665.zip
4 files changed, 4 insertions, 6 deletions
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py
index a02e923..305a5d1 100644
--- a/python/sandcrawler/ingest_file.py
+++ b/python/sandcrawler/ingest_file.py
@@ -17,7 +17,7 @@ from sandcrawler.grobid import GrobidClient
 from sandcrawler.pdfextract import process_pdf, PdfExtractResult
 from sandcrawler.misc import gen_file_metadata, clean_url, parse_cdx_datetime
 from sandcrawler.html import extract_fulltext_url
-from sandcrawler.html_ingest import fetch_html_resources, \
+from sandcrawler.ingest_html import fetch_html_resources, \
     quick_fetch_html_resources, html_guess_scope, html_extract_body_teixml, \
     WebResource, html_guess_platform
 from sandcrawler.html_metadata import BiblioMetadata, html_extract_resources, html_extract_biblio, load_adblock_rules
@@ -25,8 +25,6 @@ from sandcrawler.workers import SandcrawlerWorker
 from sandcrawler.db import SandcrawlerPostgrestClient
 from sandcrawler.xml import xml_reserialize
 
-from sandcrawler.platforms.generic import DirectFileHelper
-
 
 MAX_BODY_SIZE_BYTES = 128*1024*1024
 
diff --git a/python/sandcrawler/ingest_html.py b/python/sandcrawler/ingest_html.py
index f11cac4..56a726d 100644
--- a/python/sandcrawler/ingest_html.py
+++ b/python/sandcrawler/ingest_html.py
@@ -396,7 +396,7 @@ def main() -> None:
     """
     Run this command like:
 
-        python -m sandcrawler.html_ingest
+        python -m sandcrawler.ingest_html
     """
 
     parser = argparse.ArgumentParser(
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index a388b90..ee153ab 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -28,7 +28,7 @@ from sandcrawler.db import SandcrawlerPostgresClient
 from sandcrawler.minio import SandcrawlerMinioClient
 from sandcrawler.grobid import GrobidClient
 from sandcrawler.pdfextract import PdfExtractResult
-from sandcrawler.html_ingest import HtmlMetaRow
+from sandcrawler.ingest_html import HtmlMetaRow
 
 
 class PersistCdxWorker(SandcrawlerWorker):
diff --git a/python/tests/test_html_ingest.py b/python/tests/test_html_ingest.py
index e6e48ac..efd1ddf 100644
--- a/python/tests/test_html_ingest.py
+++ b/python/tests/test_html_ingest.py
@@ -2,7 +2,7 @@
 import datetime
 import pytest
 
-from sandcrawler.html_ingest import *
+from sandcrawler.ingest_html import *
 
 
 def test_html_extract_ojs3() -> None:
author	Bryan Newbold <bnewbold@archive.org>	2021-10-04 16:12:19 -0700
committer	Bryan Newbold <bnewbold@archive.org>	2021-10-15 18:15:25 -0700
commit	4b3d6cb79a7182be4976aab34db251ecbcbd2665 (patch)
tree	cfdd6a5223b38a288af8806e08410365022be8ea /python
parent	e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e (diff)
download	sandcrawler-4b3d6cb79a7182be4976aab34db251ecbcbd2665.tar.gz sandcrawler-4b3d6cb79a7182be4976aab34db251ecbcbd2665.zip