import re import feedparser import urllib import sys from datetime import * from django.core.management.base import BaseCommand, CommandError from django.utils.html import strip_tags from feeds.models import * # see Command definition at the end # this mechanism isn't robust yet b/c any small sets get parsed repeatedly MIN_SET_SIZE = 3 # Need to find at least this many images for each set ############################################################################### def scrape_pics_from_html(pset, html): if(type(pset) != PicSet): raise("Type error, expected PicSet") # assumptions: one per line, ordering, etc mset = re.finditer('\'([^\.]+)\'...\n") return for shortname in args: try: scrape_feed(shortname) except Exception: sys.stderr.write("Error scraping " + shortname + "\n") sys.stdout.flush() sys.stdout.write('Done scraping feeds.\n')