diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-10-28 14:49:33 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-10-28 14:49:33 +0200 |
commit | 7433c503cfdd481ab420b08bed381b1c5162a7d1 (patch) | |
tree | ee95064fbf898682686b8d584bb6e00bfe7162f7 /python/refcat/tasks.py | |
parent | d2f14aa814f051e748f2702b48f43d6356e03a94 (diff) | |
download | refcat-7433c503cfdd481ab420b08bed381b1c5162a7d1.tar.gz refcat-7433c503cfdd481ab420b08bed381b1c5162a7d1.zip |
tasks: add missing import
Diffstat (limited to 'python/refcat/tasks.py')
-rw-r--r-- | python/refcat/tasks.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index ebb5873..bbce44c 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -179,7 +179,9 @@ import os import sys import tempfile +import grobid_tei_xml import luigi +import requests from refcat.base import BaseTask, Zstd, shellout from refcat.settings import settings @@ -1530,8 +1532,10 @@ class BrefZipWikiDOI(Refcat): def output(self): return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd) + # Grobid reparse via grobid_tei_xml + class UnmatchedRefsReparse(Refcat): """ Reparse unmatched refs which have an unstructured field; about 190M/270M @@ -1568,6 +1572,7 @@ class UnmatchedRefsReparse(Refcat): def output(self): return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd) + # Wayback related, extract URL, query CDX. # # TODO: Make CDX lookup more, genenic, maybe a separate library or tool or mass |