def strip_tags(text, valid_tags={}): from BeautifulSoup import BeautifulSoup, Comment soup = BeautifulSoup(text) for comment in soup.findAll(text=lambda text: isinstance(text, Comment)): comment.extract() for tag in soup.findAll(True): if tag.name in valid_tags: valid_attrs = valid_tags[tag.name] tag.attrs = [(attr, val.replace('javascript:', '')) for attr, val in tag.attrs if attr in valid_attrs] else: tag.hidden = True return soup.renderContents().decode('utf8')