use ExpectedElementNotFound and pages group_dict

This commit is contained in:
Christophe Benz 2010-07-12 03:20:44 +02:00
commit a854d72209
2 changed files with 12 additions and 15 deletions

View file

@ -35,8 +35,8 @@ class YoupornBrowser(BaseBrowser):
PROTOCOL = 'http' PROTOCOL = 'http'
PAGES = {'http://[w\.]*youporn\.com/?': IndexPage, PAGES = {'http://[w\.]*youporn\.com/?': IndexPage,
'http://[w\.]*youporn\.com/search.*': IndexPage, 'http://[w\.]*youporn\.com/search.*': IndexPage,
'http://[w\.]*youporn\.com/watch/.+': VideoPage, 'http://[w\.]*youporn\.com/watch/(?P<id>.+)': VideoPage,
'http://[w\.]*youporngay\.com:80/watch/.+': VideoPage, 'http://[w\.]*youporngay\.com:80/watch/(?P<id>.+)': VideoPage,
} }
@id2url(YoupornVideo.id2url) @id2url(YoupornVideo.id2url)

View file

@ -20,37 +20,34 @@ import re
import datetime import datetime
from logging import warning from logging import warning
from weboob.tools.browser import ExpectedElementNotFound
from .base import PornPage from .base import PornPage
from ..video import YoupornVideo from ..video import YoupornVideo
class VideoPage(PornPage):
URL_REGEXP = re.compile("https?://[w\.]*youporn.com/watch/(\d+)/?.*")
class VideoPage(PornPage):
def on_loaded(self): def on_loaded(self):
if not PornPage.on_loaded(self): if not PornPage.on_loaded(self):
return return
self.video = YoupornVideo(self.get_id(), self.video = YoupornVideo(self.group_dict['id'],
self.get_title(), self.get_title(),
self.get_url(), self.get_url(),
) )
self.set_details(self.video) self.set_details(self.video)
def get_id(self):
m = self.URL_REGEXP.match(self.url)
if m:
return int(m.group(1))
warning("Unable to parse ID")
return 0
def get_url(self): def get_url(self):
el = self.document.getroot().cssselect('div[id=download]') el = self.document.getroot().cssselect('div[id=download]')
if el: if el:
return el[0].cssselect('a')[0].attrib['href'] return el[0].cssselect('a')[0].attrib['href']
def get_title(self): def get_title(self):
el = self.document.getroot().cssselect('#videoArea h1') selector = '#videoArea h1'
if el: try:
return unicode(el[0].getchildren()[0].tail).strip() element = self.document.getroot().cssselect(selector)[0]
except IndexError:
raise ExpectedElementNotFound(selector)
return unicode(element.getchildren()[0].tail).strip()
DATE_REGEXP = re.compile("\w+ (\w+) (\d+) (\d+):(\d+):(\d+) (\d+)") DATE_REGEXP = re.compile("\w+ (\w+) (\d+) (\d+):(\d+):(\d+) (\d+)")
MONTH2I = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] MONTH2I = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']