new select() helper

This commit is contained in:
Christophe Benz 2010-07-14 02:27:40 +02:00 committed by Romain Bignon
commit b4c672fa46
6 changed files with 67 additions and 51 deletions

View file

@ -18,7 +18,6 @@
import re
from weboob.tools.browser import ExpectedElementNotFound
from weboob.backends.aum.pages.base import PageBase
from logging import error
@ -47,4 +46,5 @@ class HomePage(PageBase):
i += 1
if i == 3:
return int(font.firstChild.data)
raise ExpectedElementNotFound(u'Could not parse number of charms available')
logging.warning(u'Could not parse number of charms available')
return 0

View file

@ -16,9 +16,11 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import datetime
import re
from weboob.tools.browser import BasePage, ExpectedElementNotFound
from weboob.tools.browser import BasePage
from weboob.tools.parsers.lxmlparser import select
from ..video import YoujizzVideo
@ -28,31 +30,20 @@ __all__ = ['IndexPage']
class IndexPage(BasePage):
def iter_videos(self):
div_id = 'span#miniatura'
span_list = self.document.getroot().cssselect(div_id)
if not span_list:
raise ExpectedElementNotFound(div_id)
span_list = select(self.document.getroot(), 'span#miniatura')
for span in span_list:
a = span.find('.//a')
if a is None:
raise ExpectedElementNotFound('%s.//a' % span)
a = select(span, 'a', 1)
url = a.attrib['href']
_id = re.sub(r'/videos/(.+)\.html', r'\1', url)
thumbnail_url = span.find('.//img').attrib['src']
title1_selector = 'span#title1'
title1 = span.cssselect(title1_selector)
if title1 is None:
raise ExpectedElementNotFound(title1_selector)
title = title1[0].text.strip()
selector = 'span#title1'
title_el = select(span, 'span#title1', 1)
title = title_el.text.strip()
thumbtime = span.cssselect('span.thumbtime')
minutes = seconds = 0
if thumbtime is not None:
time_span = thumbtime[0].find('span')
minutes, seconds = (int(v) for v in time_span.text.strip().split(':'))
time_span = select(span, 'span.thumbtime span', 1)
minutes, seconds = (int(v) for v in time_span.text.strip().split(':'))
yield YoujizzVideo(_id,
title=title,

View file

@ -20,7 +20,7 @@ import re
import datetime
from logging import warning
from weboob.tools.browser import ExpectedElementNotFound
from weboob.tools.parsers.lxmlparser import select
from .base import PornPage
from ..video import YoupornVideo
@ -42,11 +42,7 @@ class VideoPage(PornPage):
return el[0].cssselect('a')[0].attrib['href']
def get_title(self):
selector = '#videoArea h1'
try:
element = self.document.getroot().cssselect(selector)[0]
except IndexError:
raise ExpectedElementNotFound(selector)
element = select(self.document.getroot(), '#videoArea h1', 1)
return unicode(element.getchildren()[0].tail).strip()
DATE_REGEXP = re.compile("\w+ (\w+) (\d+) (\d+):(\d+):(\d+) (\d+)")

View file

@ -18,7 +18,8 @@
import re
from weboob.tools.browser import BasePage, ExpectedElementNotFound
from weboob.tools.browser import BasePage
from weboob.tools.parsers.lxmlparser import select
from .video import YoutubeVideo
@ -32,11 +33,7 @@ class ForbiddenVideo(Exception):
class ForbiddenVideoPage(BasePage):
def on_loaded(self):
selector = '.yt-alert-content'
try:
element = self.document.getroot().cssselect(selector)[0]
except IndexError:
raise ExpectedElementNotFound(selector)
element = select(self.document.getroot(), '.yt-alert-content', 1)
raise ForbiddenVideo(element.text.strip())
@ -57,19 +54,11 @@ class VideoPage(BasePage):
)
def get_author(self):
selector = 'a.watch-description-username strong'
try:
element = self.document.getroot().cssselect(selector)[0]
except IndexError:
raise ExpectedElementNotFound(selector)
element = select(self.document.getroot(), 'a.watch-description-username strong', 1)
return element.text.strip()
def get_title(self):
selector = 'meta[name=title]'
try:
element = self.document.getroot().cssselect(selector)[0]
except IndexError:
raise ExpectedElementNotFound(selector)
element = select(self.document.getroot(), 'meta[name=title]', 1)
return unicode(element.attrib['content']).strip()
def get_url(self, _id):