new select() helper

This commit is contained in:
Christophe Benz 2010-07-14 02:27:40 +02:00 committed by Romain Bignon
commit b4c672fa46
6 changed files with 67 additions and 51 deletions

View file

@ -21,7 +21,48 @@ import lxml.html
from .iparser import IParser
__all__ = ['LxmlHtmlParser']
__all__ = ['LxmlHtmlParser', 'select', 'SelectElementException']
class SelectElementException(Exception):
pass
def select(element, selector, nb=None, method='cssselect'):
"""
Select one or many elements from an element, using lxml cssselect by default.
Raises SelectElementException if not found.
@param element [obj] element on which to apply selector
@param selector [str] CSS or XPath expression
@param method [str] (cssselect|xpath)
@param nb [int] number of elements expected to be found.
Use None for undefined number, and 'many' for 1 to infinite.
@return one or many Element
"""
if method == 'cssselect':
results = element.cssselect(selector)
if nb is None:
return results
elif isinstance(nb, basestring) and nb == 'many':
if results is None or len(results) == 0:
raise SelectElementException('Element not found with selector "%s"' % selector)
elif len(results) == 1:
raise SelectElementException('Only one element found with selector "%s"' % selector)
else:
return results
elif isinstance(nb, int) and nb > 0:
if results is None:
raise SelectElementException('Element not found with selector "%s"' % selector)
elif len(results) < nb:
raise SelectElementException('Not enough elements found (%d expected) with selector "%s"' % (nb, selector))
else:
return results[0] if nb == 1 else results
else:
raise Exception('Unhandled value for kwarg "nb": %s' % nb)
else:
raise NotImplementedError('Only cssselect method is implemented for the moment')
class LxmlHtmlParser(IParser):
@ -32,7 +73,10 @@ class LxmlHtmlParser(IParser):
"""
def parse(self, data, encoding=None):
parser = lxml.html.HTMLParser(encoding=encoding)
if encoding is None:
parser = None
else:
parser = lxml.html.HTMLParser(encoding=encoding)
return lxml.html.parse(data, parser)
def tostring(self, element):