new select() helper
This commit is contained in:
parent
eb026b7c3c
commit
b4c672fa46
6 changed files with 67 additions and 51 deletions
|
|
@ -21,7 +21,48 @@ import lxml.html
|
|||
from .iparser import IParser
|
||||
|
||||
|
||||
__all__ = ['LxmlHtmlParser']
|
||||
__all__ = ['LxmlHtmlParser', 'select', 'SelectElementException']
|
||||
|
||||
|
||||
class SelectElementException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def select(element, selector, nb=None, method='cssselect'):
|
||||
"""
|
||||
Select one or many elements from an element, using lxml cssselect by default.
|
||||
|
||||
Raises SelectElementException if not found.
|
||||
|
||||
@param element [obj] element on which to apply selector
|
||||
@param selector [str] CSS or XPath expression
|
||||
@param method [str] (cssselect|xpath)
|
||||
@param nb [int] number of elements expected to be found.
|
||||
Use None for undefined number, and 'many' for 1 to infinite.
|
||||
@return one or many Element
|
||||
"""
|
||||
if method == 'cssselect':
|
||||
results = element.cssselect(selector)
|
||||
if nb is None:
|
||||
return results
|
||||
elif isinstance(nb, basestring) and nb == 'many':
|
||||
if results is None or len(results) == 0:
|
||||
raise SelectElementException('Element not found with selector "%s"' % selector)
|
||||
elif len(results) == 1:
|
||||
raise SelectElementException('Only one element found with selector "%s"' % selector)
|
||||
else:
|
||||
return results
|
||||
elif isinstance(nb, int) and nb > 0:
|
||||
if results is None:
|
||||
raise SelectElementException('Element not found with selector "%s"' % selector)
|
||||
elif len(results) < nb:
|
||||
raise SelectElementException('Not enough elements found (%d expected) with selector "%s"' % (nb, selector))
|
||||
else:
|
||||
return results[0] if nb == 1 else results
|
||||
else:
|
||||
raise Exception('Unhandled value for kwarg "nb": %s' % nb)
|
||||
else:
|
||||
raise NotImplementedError('Only cssselect method is implemented for the moment')
|
||||
|
||||
|
||||
class LxmlHtmlParser(IParser):
|
||||
|
|
@ -32,7 +73,10 @@ class LxmlHtmlParser(IParser):
|
|||
"""
|
||||
|
||||
def parse(self, data, encoding=None):
|
||||
parser = lxml.html.HTMLParser(encoding=encoding)
|
||||
if encoding is None:
|
||||
parser = None
|
||||
else:
|
||||
parser = lxml.html.HTMLParser(encoding=encoding)
|
||||
return lxml.html.parse(data, parser)
|
||||
|
||||
def tostring(self, element):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue