delete 'remove_html_tags' global function, and create IParser.tocleanstring and IParser.strip abstract methods.

This commit is contained in:
Romain Bignon 2011-10-25 13:28:43 +02:00
commit 59dfe3083a
4 changed files with 31 additions and 7 deletions

View file

@ -24,7 +24,6 @@ from dateutil import tz
from logging import warning
from time import time, sleep
from tempfile import gettempdir
import re
import os
import sys
import traceback
@ -62,10 +61,6 @@ def get_bytes_size(size, unit_name):
}
return float(size * unit_data.get(unit_name, 1))
def remove_html_tags(data):
p = re.compile(r'<.*?>')
return p.sub(' ', data)
try:
import html2text as h2t
h2t.UNICODE_SNOB = 1