from ConfigParser import ConfigParser import urllib, re, sys from xml.sax.saxutils import unescape def clean(data): try: data.decode('utf-8') except: data = data.decode('iso-8859-1').encode('utf-8') return unescape(data) def ibm2config(html, config=None): if hasattr(html, 'read'): html = html.read() if not config: config = ConfigParser() urls = dict(re.findall('var url(\d+)="(.*?)";', html)) feeds = dict(re.findall('var feed(\d+)="(.*?)";', html)) name_re = re.compile("([^>]*?)