aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-10-28 14:49:33 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-10-28 14:49:33 +0200
commit7433c503cfdd481ab420b08bed381b1c5162a7d1 (patch)
treeee95064fbf898682686b8d584bb6e00bfe7162f7 /python
parentd2f14aa814f051e748f2702b48f43d6356e03a94 (diff)
downloadrefcat-7433c503cfdd481ab420b08bed381b1c5162a7d1.tar.gz
refcat-7433c503cfdd481ab420b08bed381b1c5162a7d1.zip
tasks: add missing import
Diffstat (limited to 'python')
-rw-r--r--python/refcat/report.py3
-rw-r--r--python/refcat/tasks.py5
-rw-r--r--python/setup.py2
3 files changed, 8 insertions, 2 deletions
diff --git a/python/refcat/report.py b/python/refcat/report.py
index 4a0c219..aaa4544 100644
--- a/python/refcat/report.py
+++ b/python/refcat/report.py
@@ -55,6 +55,7 @@ class BrefDOIOnly(Refcat):
def output(self):
return luigi.LocalTarget(path=self.path(ext="tsv.zst"), format=Zstd)
+
# TODO: DOAJ subset
#
# (1) find all release idents with doaj id
@@ -108,5 +109,3 @@ class BrefDOIOnly(Refcat):
# $ zstdcat -T0 doi_refs.tsv.zst| pv -l | wc -l
# 1.32G 0:06:33 [3.34M/s] [ <=> ]1315040677
# 1315040677
-
-
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index ebb5873..bbce44c 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -179,7 +179,9 @@ import os
import sys
import tempfile
+import grobid_tei_xml
import luigi
+import requests
from refcat.base import BaseTask, Zstd, shellout
from refcat.settings import settings
@@ -1530,8 +1532,10 @@ class BrefZipWikiDOI(Refcat):
def output(self):
return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd)
+
# Grobid reparse via grobid_tei_xml
+
class UnmatchedRefsReparse(Refcat):
"""
Reparse unmatched refs which have an unstructured field; about 190M/270M
@@ -1568,6 +1572,7 @@ class UnmatchedRefsReparse(Refcat):
def output(self):
return luigi.LocalTarget(path=self.path(ext="json.zst"), format=Zstd)
+
# Wayback related, extract URL, query CDX.
#
# TODO: Make CDX lookup more, genenic, maybe a separate library or tool or mass
diff --git a/python/setup.py b/python/setup.py
index 587e269..6b3d57a 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -26,7 +26,9 @@ with open("README.md", "r") as fh:
]},
install_requires=[
"dynaconf[ini]",
+ "grobid_xml_tei",
"luigi",
+ "requests",
],
extras_require={"dev": [
"ipython",