aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/html_metadata.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/html_metadata.py')
-rw-r--r--python/sandcrawler/html_metadata.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/html_metadata.py b/python/sandcrawler/html_metadata.py
index 6b1bdef..d3ca1b7 100644
--- a/python/sandcrawler/html_metadata.py
+++ b/python/sandcrawler/html_metadata.py
@@ -14,6 +14,8 @@ import braveblock
# - google scholar crawling notes (https://scholar.google.com/intl/ja/scholar/inclusion.html#indexing)
# - inspection of actual publisher HTML
# - http://div.div1.com.au/div-thoughts/div-commentaries/66-div-commentary-metadata
+# - "HTML meta tags used by journal articles"
+# https://gist.github.com/hubgit/5985963
# order of these are mostly by preference/quality (best option first), though
# also/sometimes re-ordered for lookup efficiency (lookup stops after first
# match)