take acidcow RSS descriptions

author: bnewbold <bnewbold@robocracy.org> 2011-04-27 21:01:49 -0400
committer: bnewbold <bnewbold@robocracy.org> 2011-04-27 21:01:49 -0400
commit: 6051dcfe6715473006ff1dad5d1acd17b8d5a6a9 (patch)
tree: cbbf37c5ffc12a2b1add66789d4e78b70e910556
parent: 481459f41a30c6708c4b18e8200a4d5bcf9cd1e6 (diff)
download: piccast-6051dcfe6715473006ff1dad5d1acd17b8d5a6a9.tar.gz
piccast-6051dcfe6715473006ff1dad5d1acd17b8d5a6a9.zip
1 files changed, 6 insertions, 1 deletions
diff --git a/piccast/feeds/management/commands/scrape_feeds.py b/piccast/feeds/management/commands/scrape_feeds.py
index 957c596..6a85aef 100644
--- a/piccast/feeds/management/commands/scrape_feeds.py
+++ b/piccast/feeds/management/commands/scrape_feeds.py
@@ -5,6 +5,7 @@ import sys
 from datetime import *
 
 from django.core.management.base import BaseCommand, CommandError
+from django.utils.html import strip_tags
 from feeds.models import *
 
 # see Command definition at the end
@@ -44,7 +45,8 @@ def scrape_pics_from_html(pset, html):
     if(len(pics) < MIN_SET_SIZE):
         print "Didn't find enough pictures to save this set (found " + \
             str(len(pics)) + ", MIN_SET_SIZE=" + str(MIN_SET_SIZE) + ")"
-        pset.delete()
+        pset.is_valid = False
+        pset.save()
         return
 
     # TODO: oh boy, serial, this is a horrible way to do it! 
@@ -168,6 +170,9 @@ def scrape_feed(feed_shortname):
 
         # Ok, this is where we split out and do custom, per-site processing
         if(feed_shortname == u"acidcow"):
+            p.description = strip_tags(pset['description'])
+            if(p.description.starts_with(u"Simlar posts:"):
+                p.description = None
             p.save()
             print "Great, saved: " + p.title + " (id=" + str(p.id) + ")"
             scrape_pics_acidcow(p)
author	bnewbold <bnewbold@robocracy.org>	2011-04-27 21:01:49 -0400
committer	bnewbold <bnewbold@robocracy.org>	2011-04-27 21:01:49 -0400
commit	6051dcfe6715473006ff1dad5d1acd17b8d5a6a9 (patch)
tree	cbbf37c5ffc12a2b1add66789d4e78b70e910556
parent	481459f41a30c6708c4b18e8200a4d5bcf9cd1e6 (diff)
download	piccast-6051dcfe6715473006ff1dad5d1acd17b8d5a6a9.tar.gz piccast-6051dcfe6715473006ff1dad5d1acd17b8d5a6a9.zip