From 5f59c130b3cfa315dabcf56f3722694fe620e020 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Sat, 12 Apr 2014 12:48:14 +0200 Subject: [PATCH] add decorator pagination --- modules/youjizz/browser.py | 4 ++-- modules/youjizz/pages/index.py | 3 ++- weboob/tools/browser2/page.py | 40 ++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/modules/youjizz/browser.py b/modules/youjizz/browser.py index 04dd0f50..0d554330 100644 --- a/modules/youjizz/browser.py +++ b/modules/youjizz/browser.py @@ -47,10 +47,10 @@ class YoujizzBrowser(PagesBrowser): self.search.go(pattern=pattern, pagenum=1) assert self.search.is_here(pattern=pattern, pagenum=1) - return self.pagination(lambda: self.page.iter_videos()) + return self.page.iter_videos() def latest_videos(self): self.index.go() assert self.index.is_here() - return self.pagination(lambda: self.page.iter_videos()) + return self.page.iter_videos() diff --git a/modules/youjizz/pages/index.py b/modules/youjizz/pages/index.py index f04095b8..46c76f79 100644 --- a/modules/youjizz/pages/index.py +++ b/modules/youjizz/pages/index.py @@ -19,7 +19,7 @@ from weboob.tools.browser2 import HTMLPage -from weboob.tools.browser2.page import ListElement, method, ItemElement +from weboob.tools.browser2.page import ListElement, method, ItemElement, pagination from weboob.tools.browser2.filters import Link, CleanText, Duration, Regexp from weboob.capabilities.base import NotAvailable from weboob.capabilities.image import BaseImage @@ -30,6 +30,7 @@ __all__ = ['IndexPage'] class IndexPage(HTMLPage): + @pagination @method class iter_videos(ListElement): item_xpath = '//span[@id="miniatura"]' diff --git a/weboob/tools/browser2/page.py b/weboob/tools/browser2/page.py index cb286865..a2346b68 100644 --- a/weboob/tools/browser2/page.py +++ b/weboob/tools/browser2/page.py @@ -299,6 +299,46 @@ class PagesBrowser(DomainBrowser): else: return +def pagination(func): + r""" + This helper decorator can be used to handle pagination pages easily. + + When the called function raises an exception `NextPage`, it goes on the + wanted page and recall the function. + + NextPage constructor can take an url or a Request object. + + >>> class Page(HTMLPage): + ... @pagination + ... def iter_values(self): + ... for el in self.doc.xpath('//li'): + ... yield el.text + ... for next in self.doc.xpath('//a'): + ... raise NextPage(next.attrib['href']) + ... + >>> class Browser(PagesBrowser): + ... BASEURL = 'http://people.symlink.me' + ... list = URL('/~rom1/projects/weboob/list-(?P\d+).html', Page) + ... + >>> b = Browser() + >>> b.list.go(pagenum=1) + >>> list(b.page.iter_values()) + ['One', 'Two', 'Three', 'Four'] + """ + def inner(self, *args, **kwargs): + page = self + while 1: + try: + for r in func(page, *args, **kwargs): + yield r + except NextPage as e: + result = page.browser.location(e.request) + page = result.page + else: + return + + return inner + class NextPage(Exception): """ Exception used for example in a BasePage to tell PagesBrowser.pagination to