diff options
Diffstat (limited to 'python/sandcrawler/html_metadata.py')
-rw-r--r-- | python/sandcrawler/html_metadata.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/html_metadata.py b/python/sandcrawler/html_metadata.py index 6b1bdef..d3ca1b7 100644 --- a/python/sandcrawler/html_metadata.py +++ b/python/sandcrawler/html_metadata.py @@ -14,6 +14,8 @@ import braveblock # - google scholar crawling notes (https://scholar.google.com/intl/ja/scholar/inclusion.html#indexing) # - inspection of actual publisher HTML # - http://div.div1.com.au/div-thoughts/div-commentaries/66-div-commentary-metadata +# - "HTML meta tags used by journal articles" +# https://gist.github.com/hubgit/5985963 # order of these are mostly by preference/quality (best option first), though # also/sometimes re-ordered for lookup efficiency (lookup stops after first # match) |