diff --git a/weboob/tools/browser2/browser.py b/weboob/tools/browser2/browser.py index 31edebfe..317489b3 100644 --- a/weboob/tools/browser2/browser.py +++ b/weboob/tools/browser2/browser.py @@ -120,10 +120,25 @@ class BaseBrowser(object): """ PROFILE = Firefox() + """ + Default profile used by browser to navigate on websites. + """ + TIMEOUT = 10.0 + """ + Default timeout during requests. + """ + REFRESH_MAX = 0.0 + """ + When handling a Refresh header, the browsers considers it only if the sleep + time in lesser than this value. + """ VERIFY = True + """ + Check SSL certificates. + """ PROXIES = None @@ -222,7 +237,7 @@ class BaseBrowser(object): def location(self, url, **kwargs): """ - Like open() but also changes the current URL and response. + Like :meth:`open` but also changes the current URL and response. This is the most common method to request web pages. Other than that, has the exact same behavior of open(). @@ -393,7 +408,10 @@ class BaseBrowser(object): class UrlNotAllowed(Exception): - pass + """ + Raises by :class:`DomainBrowser` when `RESTRICT_URL` is set and trying to go + on an url not matching `BASEURL`. + """ class DomainBrowser(BaseBrowser): @@ -410,6 +428,7 @@ class DomainBrowser(BaseBrowser): See absurl(). """ + RESTRICT_URL = False """ URLs allowed to load. This can be used to force SSL (if the BASEURL is SSL) or any other leakage. @@ -417,7 +436,6 @@ class DomainBrowser(BaseBrowser): Set it to a list of allowed URLs if you have multiple allowed URLs. More complex behavior is possible by overloading url_allowed() """ - RESTRICT_URL = False def url_allowed(self, url): """ @@ -458,6 +476,10 @@ class DomainBrowser(BaseBrowser): return urljoin(base, uri) def open(self, req, *args, **kwargs): + """ + Like :meth:`BaseBrowser.open` but hanldes urls without domains, using + the :attr:`BASEURL` attribute. + """ uri = req.url if isinstance(req, requests.Request) else req url = self.absurl(uri) diff --git a/weboob/tools/browser2/filters.py b/weboob/tools/browser2/filters.py index fa6a2fa0..cdb834d0 100644 --- a/weboob/tools/browser2/filters.py +++ b/weboob/tools/browser2/filters.py @@ -133,17 +133,18 @@ class TableCell(_Filter): For example: - class table(TableElement): - head_xpath = '//table/thead/th' - item_xpath = '//table/tbody/tr' - - col_date = u'Date' - col_label = [u'Name', u'Label'] - - class item(ItemElement): - klass = Object - obj_date = Date(TableCell('date')) - obj_label = CleanText(TableCell('label')) + >>> from weboob.capabilities.bank import Transaction + >>> from .page import TableElement, ItemElement + >>> class table(TableElement): + ... head_xpath = '//table/thead/th' + ... item_xpath = '//table/tbody/tr' + ... col_date = u'Date' + ... col_label = [u'Name', u'Label'] + ... class item(ItemElement): + ... klass = Transaction + ... obj_date = Date(TableCell('date')) + ... obj_label = CleanText(TableCell('label')) + ... """ def __init__(self, *names, **kwargs): diff --git a/weboob/tools/browser2/page.py b/weboob/tools/browser2/page.py index a2346b68..c5fb3003 100644 --- a/weboob/tools/browser2/page.py +++ b/weboob/tools/browser2/page.py @@ -39,11 +39,15 @@ from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound class UrlNotResolvable(Exception): - pass + """ + Raised when trying to locate on an URL instance which url pattern is not resolvable as a real url. + """ class DataError(Exception): - pass + """ + Returned data from pages are incoherent. + """ class URL(object): @@ -128,6 +132,12 @@ class URL(object): return r.page or r def build(self, **kwargs): + """ + Build an url with the given arguments from URL's regexps. + + :rtype: :class:`str` + :raises: :class:`UrlNotResolvable` if unable to resolve a correct url with the given arguments. + """ patterns = [] for url in self.urls: patterns += normalize(url) @@ -142,6 +152,9 @@ class URL(object): raise UrlNotResolvable('Unable to resolve URL with %r. Available are %s' % (kwargs, ', '.join([pattern for pattern, _ in patterns]))) def match(self, url, base=None): + """ + Check if the given url match this object. + """ if base is None: assert self.browser is not None base = self.browser.BASEURL @@ -165,6 +178,9 @@ class URL(object): return self.klass(self.browser, response, m.groupdict()) def id2url(self, func): + r""" + Helper decorator to get an URL if the given first parameter is an ID. + """ def inner(browser, id_or_url, *args, **kwargs): if re.match('^https?://.*', id_or_url): if not self.match(id_or_url, browser.BASEURL): @@ -203,11 +219,17 @@ class PagesBrowser(DomainBrowser): Example: - class MyBrowser(PagesBrowser): - BASEURL = 'http://example.org' - - home = URL('/(index\.html)?', HomePage) - list = URL('/list\.html', ListPage) + >>> class HomePage(BasePage): + ... pass + ... + >>> class ListPage(BasePage): + ... pass + ... + >>> class MyBrowser(PagesBrowser): + ... BASEURL = 'http://example.org' + ... home = URL('/(index\.html)?', HomePage) + ... list = URL('/list\.html', ListPage) + ... You can then use URL instances to go on pages. """ @@ -232,6 +254,12 @@ class PagesBrowser(DomainBrowser): url.browser = self def open(self, *args, **kwargs): + """ + Same method than + :meth:`weboob.tools.browser2.browser.DomainBrowser.open`, but the + response contains an attribute `page` if the url matches any + :class:`URL` object. + """ response = super(PagesBrowser, self).open(*args, **kwargs) response.page = None @@ -248,6 +276,12 @@ class PagesBrowser(DomainBrowser): return response def location(self, *args, **kwargs): + """ + Same method than + :meth:`weboob.tools.browser2.browser.BaseBrowser.location`, but if the + url matches any :class:`URL` object, an attribute `page` is added to + response, and the attribute :attr:`PagesBrowser.page` is set. + """ if self.page is not None: # Call leave hook. self.page.on_leave() @@ -269,10 +303,10 @@ class PagesBrowser(DomainBrowser): r""" This helper function can be used to handle pagination pages easily. - When the called function raises an exception `NextPage`, it goes on the - wanted page and recall the function. + When the called function raises an exception :class:`NextPage`, it goes + on the wanted page and recall the function. - NextPage constructor can take an url or a Request object. + :class:`NextPage` constructor can take an url or a Request object. >>> class Page(HTMLPage): ... def iter_values(self): @@ -303,10 +337,10 @@ def pagination(func): r""" This helper decorator can be used to handle pagination pages easily. - When the called function raises an exception `NextPage`, it goes on the - wanted page and recall the function. + When the called function raises an exception :class:`NextPage`, it goes on + the wanted page and recall the function. - NextPage constructor can take an url or a Request object. + :class:`NextPage` constructor can take an url or a Request object. >>> class Page(HTMLPage): ... @pagination @@ -325,8 +359,7 @@ def pagination(func): >>> list(b.page.iter_values()) ['One', 'Two', 'Three', 'Four'] """ - def inner(self, *args, **kwargs): - page = self + def inner(page, *args, **kwargs): while 1: try: for r in func(page, *args, **kwargs): @@ -344,7 +377,7 @@ class NextPage(Exception): Exception used for example in a BasePage to tell PagesBrowser.pagination to go on the next page. - See PagesBrowser.pagination. + See :meth:`PagesBrowser.pagination` or decorator :func:`pagination`. """ def __init__(self, request): super(NextPage, self).__init__() @@ -395,13 +428,19 @@ class BasePage(object): self.params = params def on_load(self): - pass + """ + Event called when browser loads this page. + """ def on_leave(self): - pass + """ + Event called when browser leaves this page. + """ class FormNotFound(Exception): - pass + """ + Raised when :meth:`HTMLPage.get_form` can't find a form. + """ class Form(OrderedDict): """ @@ -489,13 +528,19 @@ class HTMLPage(BasePage): parser = html.HTMLParser(encoding=response.encoding) self.doc = html.parse(StringIO(response.content), parser) - def get_form(self, xpath=None, name=None, nr=None): + def get_form(self, xpath='//form', name=None, nr=None): """ - Get a Form object from a xpath selector. - """ - if xpath is None: - xpath = '//form' + Get a :class:`Form` object from a selector. + :param xpath: xpath string to select forms + :type xpath: :class:`str` + :param name: if supplied, select a form with the given name + :type name: :class:`str` + :param nr: if supplied, take the n-th selected form + :type nr: :class:`int` + :rtype: :class:`Form` + :raises: :class:`FormNotFound` if no form is found + """ i = 0 for el in self.doc.xpath(xpath): if name is not None and el.attrib.get('name', '') != name: @@ -617,12 +662,14 @@ class ListElement(AbstractElement): class SkipItem(Exception): - pass + """ + Raise this exception in an :class:`ItemElement` subclass to skip an item. + """ class _ItemElementMeta(type): """ - Private meta-class used to keep order of obj_* attributes in ItemElement. + Private meta-class used to keep order of obj_* attributes in :class:`ItemElement`. """ def __new__(mcs, name, bases, attrs): _attrs = [] diff --git a/weboob/tools/capabilities/bank/transactions.py b/weboob/tools/capabilities/bank/transactions.py index 278dc35a..7d29f96d 100644 --- a/weboob/tools/capabilities/bank/transactions.py +++ b/weboob/tools/capabilities/bank/transactions.py @@ -102,11 +102,11 @@ class FrenchTransaction(Transaction): PATTERN class attribute) with a list containing tuples of regexp and the associated type, for example:: - PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), FrenchTransaction.TYPE_TRANSFER), - (re.compile('^PRLV (?P.*)'), FrenchTransaction.TYPE_ORDER), - (re.compile('^(?P.*) CARTE \d+ PAIEMENT CB (?P
\d{2})(?P\d{2}) ?(.*)$'), - FrenchTransaction.TYPE_CARD) - ] + >>> PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), FrenchTransaction.TYPE_TRANSFER), + ... (re.compile('^PRLV (?P.*)'), FrenchTransaction.TYPE_ORDER), + ... (re.compile('^(?P.*) CARTE \d+ PAIEMENT CB (?P
\d{2})(?P\d{2}) ?(.*)$'), + ... FrenchTransaction.TYPE_CARD) + ... ] In regexps, you can define this patterns: