diff options
| author | bnewbold <bnewbold@robocracy.org> | 2011-04-27 21:01:49 -0400 | 
|---|---|---|
| committer | bnewbold <bnewbold@robocracy.org> | 2011-04-27 21:01:49 -0400 | 
| commit | 6051dcfe6715473006ff1dad5d1acd17b8d5a6a9 (patch) | |
| tree | cbbf37c5ffc12a2b1add66789d4e78b70e910556 | |
| parent | 481459f41a30c6708c4b18e8200a4d5bcf9cd1e6 (diff) | |
| download | piccast-6051dcfe6715473006ff1dad5d1acd17b8d5a6a9.tar.gz piccast-6051dcfe6715473006ff1dad5d1acd17b8d5a6a9.zip | |
take acidcow RSS descriptions
| -rw-r--r-- | piccast/feeds/management/commands/scrape_feeds.py | 7 | 
1 files changed, 6 insertions, 1 deletions
| diff --git a/piccast/feeds/management/commands/scrape_feeds.py b/piccast/feeds/management/commands/scrape_feeds.py index 957c596..6a85aef 100644 --- a/piccast/feeds/management/commands/scrape_feeds.py +++ b/piccast/feeds/management/commands/scrape_feeds.py @@ -5,6 +5,7 @@ import sys  from datetime import *  from django.core.management.base import BaseCommand, CommandError +from django.utils.html import strip_tags  from feeds.models import *  # see Command definition at the end @@ -44,7 +45,8 @@ def scrape_pics_from_html(pset, html):      if(len(pics) < MIN_SET_SIZE):          print "Didn't find enough pictures to save this set (found " + \              str(len(pics)) + ", MIN_SET_SIZE=" + str(MIN_SET_SIZE) + ")" -        pset.delete() +        pset.is_valid = False +        pset.save()          return      # TODO: oh boy, serial, this is a horrible way to do it!  @@ -168,6 +170,9 @@ def scrape_feed(feed_shortname):          # Ok, this is where we split out and do custom, per-site processing          if(feed_shortname == u"acidcow"): +            p.description = strip_tags(pset['description']) +            if(p.description.starts_with(u"Simlar posts:"): +                p.description = None              p.save()              print "Great, saved: " + p.title + " (id=" + str(p.id) + ")"              scrape_pics_acidcow(p) | 
