improve documentation of browser2

This commit is contained in:
Romain Bignon 2014-04-12 15:00:03 +02:00
commit 07f6507096
4 changed files with 115 additions and 45 deletions

View file

@ -120,10 +120,25 @@ class BaseBrowser(object):
""" """
PROFILE = Firefox() PROFILE = Firefox()
"""
Default profile used by browser to navigate on websites.
"""
TIMEOUT = 10.0 TIMEOUT = 10.0
"""
Default timeout during requests.
"""
REFRESH_MAX = 0.0 REFRESH_MAX = 0.0
"""
When handling a Refresh header, the browsers considers it only if the sleep
time in lesser than this value.
"""
VERIFY = True VERIFY = True
"""
Check SSL certificates.
"""
PROXIES = None PROXIES = None
@ -222,7 +237,7 @@ class BaseBrowser(object):
def location(self, url, **kwargs): def location(self, url, **kwargs):
""" """
Like open() but also changes the current URL and response. Like :meth:`open` but also changes the current URL and response.
This is the most common method to request web pages. This is the most common method to request web pages.
Other than that, has the exact same behavior of open(). Other than that, has the exact same behavior of open().
@ -393,7 +408,10 @@ class BaseBrowser(object):
class UrlNotAllowed(Exception): class UrlNotAllowed(Exception):
pass """
Raises by :class:`DomainBrowser` when `RESTRICT_URL` is set and trying to go
on an url not matching `BASEURL`.
"""
class DomainBrowser(BaseBrowser): class DomainBrowser(BaseBrowser):
@ -410,6 +428,7 @@ class DomainBrowser(BaseBrowser):
See absurl(). See absurl().
""" """
RESTRICT_URL = False
""" """
URLs allowed to load. URLs allowed to load.
This can be used to force SSL (if the BASEURL is SSL) or any other leakage. This can be used to force SSL (if the BASEURL is SSL) or any other leakage.
@ -417,7 +436,6 @@ class DomainBrowser(BaseBrowser):
Set it to a list of allowed URLs if you have multiple allowed URLs. Set it to a list of allowed URLs if you have multiple allowed URLs.
More complex behavior is possible by overloading url_allowed() More complex behavior is possible by overloading url_allowed()
""" """
RESTRICT_URL = False
def url_allowed(self, url): def url_allowed(self, url):
""" """
@ -458,6 +476,10 @@ class DomainBrowser(BaseBrowser):
return urljoin(base, uri) return urljoin(base, uri)
def open(self, req, *args, **kwargs): def open(self, req, *args, **kwargs):
"""
Like :meth:`BaseBrowser.open` but hanldes urls without domains, using
the :attr:`BASEURL` attribute.
"""
uri = req.url if isinstance(req, requests.Request) else req uri = req.url if isinstance(req, requests.Request) else req
url = self.absurl(uri) url = self.absurl(uri)

View file

@ -133,17 +133,18 @@ class TableCell(_Filter):
For example: For example:
class table(TableElement): >>> from weboob.capabilities.bank import Transaction
head_xpath = '//table/thead/th' >>> from .page import TableElement, ItemElement
item_xpath = '//table/tbody/tr' >>> class table(TableElement):
... head_xpath = '//table/thead/th'
col_date = u'Date' ... item_xpath = '//table/tbody/tr'
col_label = [u'Name', u'Label'] ... col_date = u'Date'
... col_label = [u'Name', u'Label']
class item(ItemElement): ... class item(ItemElement):
klass = Object ... klass = Transaction
obj_date = Date(TableCell('date')) ... obj_date = Date(TableCell('date'))
obj_label = CleanText(TableCell('label')) ... obj_label = CleanText(TableCell('label'))
...
""" """
def __init__(self, *names, **kwargs): def __init__(self, *names, **kwargs):

View file

@ -39,11 +39,15 @@ from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound
class UrlNotResolvable(Exception): class UrlNotResolvable(Exception):
pass """
Raised when trying to locate on an URL instance which url pattern is not resolvable as a real url.
"""
class DataError(Exception): class DataError(Exception):
pass """
Returned data from pages are incoherent.
"""
class URL(object): class URL(object):
@ -128,6 +132,12 @@ class URL(object):
return r.page or r return r.page or r
def build(self, **kwargs): def build(self, **kwargs):
"""
Build an url with the given arguments from URL's regexps.
:rtype: :class:`str`
:raises: :class:`UrlNotResolvable` if unable to resolve a correct url with the given arguments.
"""
patterns = [] patterns = []
for url in self.urls: for url in self.urls:
patterns += normalize(url) patterns += normalize(url)
@ -142,6 +152,9 @@ class URL(object):
raise UrlNotResolvable('Unable to resolve URL with %r. Available are %s' % (kwargs, ', '.join([pattern for pattern, _ in patterns]))) raise UrlNotResolvable('Unable to resolve URL with %r. Available are %s' % (kwargs, ', '.join([pattern for pattern, _ in patterns])))
def match(self, url, base=None): def match(self, url, base=None):
"""
Check if the given url match this object.
"""
if base is None: if base is None:
assert self.browser is not None assert self.browser is not None
base = self.browser.BASEURL base = self.browser.BASEURL
@ -165,6 +178,9 @@ class URL(object):
return self.klass(self.browser, response, m.groupdict()) return self.klass(self.browser, response, m.groupdict())
def id2url(self, func): def id2url(self, func):
r"""
Helper decorator to get an URL if the given first parameter is an ID.
"""
def inner(browser, id_or_url, *args, **kwargs): def inner(browser, id_or_url, *args, **kwargs):
if re.match('^https?://.*', id_or_url): if re.match('^https?://.*', id_or_url):
if not self.match(id_or_url, browser.BASEURL): if not self.match(id_or_url, browser.BASEURL):
@ -203,11 +219,17 @@ class PagesBrowser(DomainBrowser):
Example: Example:
class MyBrowser(PagesBrowser): >>> class HomePage(BasePage):
BASEURL = 'http://example.org' ... pass
...
home = URL('/(index\.html)?', HomePage) >>> class ListPage(BasePage):
list = URL('/list\.html', ListPage) ... pass
...
>>> class MyBrowser(PagesBrowser):
... BASEURL = 'http://example.org'
... home = URL('/(index\.html)?', HomePage)
... list = URL('/list\.html', ListPage)
...
You can then use URL instances to go on pages. You can then use URL instances to go on pages.
""" """
@ -232,6 +254,12 @@ class PagesBrowser(DomainBrowser):
url.browser = self url.browser = self
def open(self, *args, **kwargs): def open(self, *args, **kwargs):
"""
Same method than
:meth:`weboob.tools.browser2.browser.DomainBrowser.open`, but the
response contains an attribute `page` if the url matches any
:class:`URL` object.
"""
response = super(PagesBrowser, self).open(*args, **kwargs) response = super(PagesBrowser, self).open(*args, **kwargs)
response.page = None response.page = None
@ -248,6 +276,12 @@ class PagesBrowser(DomainBrowser):
return response return response
def location(self, *args, **kwargs): def location(self, *args, **kwargs):
"""
Same method than
:meth:`weboob.tools.browser2.browser.BaseBrowser.location`, but if the
url matches any :class:`URL` object, an attribute `page` is added to
response, and the attribute :attr:`PagesBrowser.page` is set.
"""
if self.page is not None: if self.page is not None:
# Call leave hook. # Call leave hook.
self.page.on_leave() self.page.on_leave()
@ -269,10 +303,10 @@ class PagesBrowser(DomainBrowser):
r""" r"""
This helper function can be used to handle pagination pages easily. This helper function can be used to handle pagination pages easily.
When the called function raises an exception `NextPage`, it goes on the When the called function raises an exception :class:`NextPage`, it goes
wanted page and recall the function. on the wanted page and recall the function.
NextPage constructor can take an url or a Request object. :class:`NextPage` constructor can take an url or a Request object.
>>> class Page(HTMLPage): >>> class Page(HTMLPage):
... def iter_values(self): ... def iter_values(self):
@ -303,10 +337,10 @@ def pagination(func):
r""" r"""
This helper decorator can be used to handle pagination pages easily. This helper decorator can be used to handle pagination pages easily.
When the called function raises an exception `NextPage`, it goes on the When the called function raises an exception :class:`NextPage`, it goes on
wanted page and recall the function. the wanted page and recall the function.
NextPage constructor can take an url or a Request object. :class:`NextPage` constructor can take an url or a Request object.
>>> class Page(HTMLPage): >>> class Page(HTMLPage):
... @pagination ... @pagination
@ -325,8 +359,7 @@ def pagination(func):
>>> list(b.page.iter_values()) >>> list(b.page.iter_values())
['One', 'Two', 'Three', 'Four'] ['One', 'Two', 'Three', 'Four']
""" """
def inner(self, *args, **kwargs): def inner(page, *args, **kwargs):
page = self
while 1: while 1:
try: try:
for r in func(page, *args, **kwargs): for r in func(page, *args, **kwargs):
@ -344,7 +377,7 @@ class NextPage(Exception):
Exception used for example in a BasePage to tell PagesBrowser.pagination to Exception used for example in a BasePage to tell PagesBrowser.pagination to
go on the next page. go on the next page.
See PagesBrowser.pagination. See :meth:`PagesBrowser.pagination` or decorator :func:`pagination`.
""" """
def __init__(self, request): def __init__(self, request):
super(NextPage, self).__init__() super(NextPage, self).__init__()
@ -395,13 +428,19 @@ class BasePage(object):
self.params = params self.params = params
def on_load(self): def on_load(self):
pass """
Event called when browser loads this page.
"""
def on_leave(self): def on_leave(self):
pass """
Event called when browser leaves this page.
"""
class FormNotFound(Exception): class FormNotFound(Exception):
pass """
Raised when :meth:`HTMLPage.get_form` can't find a form.
"""
class Form(OrderedDict): class Form(OrderedDict):
""" """
@ -489,13 +528,19 @@ class HTMLPage(BasePage):
parser = html.HTMLParser(encoding=response.encoding) parser = html.HTMLParser(encoding=response.encoding)
self.doc = html.parse(StringIO(response.content), parser) self.doc = html.parse(StringIO(response.content), parser)
def get_form(self, xpath=None, name=None, nr=None): def get_form(self, xpath='//form', name=None, nr=None):
""" """
Get a Form object from a xpath selector. Get a :class:`Form` object from a selector.
"""
if xpath is None:
xpath = '//form'
:param xpath: xpath string to select forms
:type xpath: :class:`str`
:param name: if supplied, select a form with the given name
:type name: :class:`str`
:param nr: if supplied, take the n-th selected form
:type nr: :class:`int`
:rtype: :class:`Form`
:raises: :class:`FormNotFound` if no form is found
"""
i = 0 i = 0
for el in self.doc.xpath(xpath): for el in self.doc.xpath(xpath):
if name is not None and el.attrib.get('name', '') != name: if name is not None and el.attrib.get('name', '') != name:
@ -617,12 +662,14 @@ class ListElement(AbstractElement):
class SkipItem(Exception): class SkipItem(Exception):
pass """
Raise this exception in an :class:`ItemElement` subclass to skip an item.
"""
class _ItemElementMeta(type): class _ItemElementMeta(type):
""" """
Private meta-class used to keep order of obj_* attributes in ItemElement. Private meta-class used to keep order of obj_* attributes in :class:`ItemElement`.
""" """
def __new__(mcs, name, bases, attrs): def __new__(mcs, name, bases, attrs):
_attrs = [] _attrs = []

View file

@ -102,11 +102,11 @@ class FrenchTransaction(Transaction):
PATTERN class attribute) with a list containing tuples of regexp PATTERN class attribute) with a list containing tuples of regexp
and the associated type, for example:: and the associated type, for example::
PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER), >>> PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER), ... (re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'), ... (re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
FrenchTransaction.TYPE_CARD) ... FrenchTransaction.TYPE_CARD)
] ... ]
In regexps, you can define this patterns: In regexps, you can define this patterns: