aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/test_html_ingest.py
blob: 943e5da68ec436a49b2912aba097f11f517f0f8b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

import datetime

import pytest

from sandcrawler.ingest_html import *


def test_html_extract_ojs3() -> None:

    with open('tests/files/first_monday_ojs3_fulltext.html', 'rb') as f:
        ojs3_html = f.read()

    fulltext = html_extract_body_teixml(ojs3_html)
    assert fulltext['status'] == 'success'