From a854d722093216cfca7c7e949601e4432b6edf24 Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 12 Jul 2010 03:20:44 +0200 Subject: [PATCH] use ExpectedElementNotFound and pages group_dict --- weboob/backends/youporn/browser.py | 4 ++-- weboob/backends/youporn/pages/video.py | 23 ++++++++++------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/weboob/backends/youporn/browser.py b/weboob/backends/youporn/browser.py index 0b292eb1..89e0568b 100644 --- a/weboob/backends/youporn/browser.py +++ b/weboob/backends/youporn/browser.py @@ -35,8 +35,8 @@ class YoupornBrowser(BaseBrowser): PROTOCOL = 'http' PAGES = {'http://[w\.]*youporn\.com/?': IndexPage, 'http://[w\.]*youporn\.com/search.*': IndexPage, - 'http://[w\.]*youporn\.com/watch/.+': VideoPage, - 'http://[w\.]*youporngay\.com:80/watch/.+': VideoPage, + 'http://[w\.]*youporn\.com/watch/(?P.+)': VideoPage, + 'http://[w\.]*youporngay\.com:80/watch/(?P.+)': VideoPage, } @id2url(YoupornVideo.id2url) diff --git a/weboob/backends/youporn/pages/video.py b/weboob/backends/youporn/pages/video.py index 6e81600d..1f651340 100644 --- a/weboob/backends/youporn/pages/video.py +++ b/weboob/backends/youporn/pages/video.py @@ -20,37 +20,34 @@ import re import datetime from logging import warning +from weboob.tools.browser import ExpectedElementNotFound + from .base import PornPage from ..video import YoupornVideo -class VideoPage(PornPage): - URL_REGEXP = re.compile("https?://[w\.]*youporn.com/watch/(\d+)/?.*") +class VideoPage(PornPage): def on_loaded(self): if not PornPage.on_loaded(self): return - self.video = YoupornVideo(self.get_id(), + self.video = YoupornVideo(self.group_dict['id'], self.get_title(), self.get_url(), ) self.set_details(self.video) - def get_id(self): - m = self.URL_REGEXP.match(self.url) - if m: - return int(m.group(1)) - warning("Unable to parse ID") - return 0 - def get_url(self): el = self.document.getroot().cssselect('div[id=download]') if el: return el[0].cssselect('a')[0].attrib['href'] def get_title(self): - el = self.document.getroot().cssselect('#videoArea h1') - if el: - return unicode(el[0].getchildren()[0].tail).strip() + selector = '#videoArea h1' + try: + element = self.document.getroot().cssselect(selector)[0] + except IndexError: + raise ExpectedElementNotFound(selector) + return unicode(element.getchildren()[0].tail).strip() DATE_REGEXP = re.compile("\w+ (\w+) (\d+) (\d+):(\d+):(\d+) (\d+)") MONTH2I = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']