diff options
-rw-r--r-- | piccast/feeds/management/commands/scrape_feeds.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/piccast/feeds/management/commands/scrape_feeds.py b/piccast/feeds/management/commands/scrape_feeds.py index 957c596..6a85aef 100644 --- a/piccast/feeds/management/commands/scrape_feeds.py +++ b/piccast/feeds/management/commands/scrape_feeds.py @@ -5,6 +5,7 @@ import sys from datetime import * from django.core.management.base import BaseCommand, CommandError +from django.utils.html import strip_tags from feeds.models import * # see Command definition at the end @@ -44,7 +45,8 @@ def scrape_pics_from_html(pset, html): if(len(pics) < MIN_SET_SIZE): print "Didn't find enough pictures to save this set (found " + \ str(len(pics)) + ", MIN_SET_SIZE=" + str(MIN_SET_SIZE) + ")" - pset.delete() + pset.is_valid = False + pset.save() return # TODO: oh boy, serial, this is a horrible way to do it! @@ -168,6 +170,9 @@ def scrape_feed(feed_shortname): # Ok, this is where we split out and do custom, per-site processing if(feed_shortname == u"acidcow"): + p.description = strip_tags(pset['description']) + if(p.description.starts_with(u"Simlar posts:"): + p.description = None p.save() print "Great, saved: " + p.title + " (id=" + str(p.id) + ")" scrape_pics_acidcow(p) |