new select() helper
This commit is contained in:
parent
eb026b7c3c
commit
b4c672fa46
6 changed files with 67 additions and 51 deletions
|
|
@ -18,7 +18,6 @@
|
|||
|
||||
import re
|
||||
|
||||
from weboob.tools.browser import ExpectedElementNotFound
|
||||
from weboob.backends.aum.pages.base import PageBase
|
||||
from logging import error
|
||||
|
||||
|
|
@ -47,4 +46,5 @@ class HomePage(PageBase):
|
|||
i += 1
|
||||
if i == 3:
|
||||
return int(font.firstChild.data)
|
||||
raise ExpectedElementNotFound(u'Could not parse number of charms available')
|
||||
logging.warning(u'Could not parse number of charms available')
|
||||
return 0
|
||||
|
|
|
|||
|
|
@ -16,9 +16,11 @@
|
|||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from weboob.tools.browser import BasePage, ExpectedElementNotFound
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.tools.parsers.lxmlparser import select
|
||||
|
||||
from ..video import YoujizzVideo
|
||||
|
||||
|
|
@ -28,31 +30,20 @@ __all__ = ['IndexPage']
|
|||
|
||||
class IndexPage(BasePage):
|
||||
def iter_videos(self):
|
||||
div_id = 'span#miniatura'
|
||||
span_list = self.document.getroot().cssselect(div_id)
|
||||
if not span_list:
|
||||
raise ExpectedElementNotFound(div_id)
|
||||
|
||||
span_list = select(self.document.getroot(), 'span#miniatura')
|
||||
for span in span_list:
|
||||
a = span.find('.//a')
|
||||
if a is None:
|
||||
raise ExpectedElementNotFound('%s.//a' % span)
|
||||
a = select(span, 'a', 1)
|
||||
url = a.attrib['href']
|
||||
_id = re.sub(r'/videos/(.+)\.html', r'\1', url)
|
||||
|
||||
thumbnail_url = span.find('.//img').attrib['src']
|
||||
|
||||
title1_selector = 'span#title1'
|
||||
title1 = span.cssselect(title1_selector)
|
||||
if title1 is None:
|
||||
raise ExpectedElementNotFound(title1_selector)
|
||||
title = title1[0].text.strip()
|
||||
selector = 'span#title1'
|
||||
title_el = select(span, 'span#title1', 1)
|
||||
title = title_el.text.strip()
|
||||
|
||||
thumbtime = span.cssselect('span.thumbtime')
|
||||
minutes = seconds = 0
|
||||
if thumbtime is not None:
|
||||
time_span = thumbtime[0].find('span')
|
||||
minutes, seconds = (int(v) for v in time_span.text.strip().split(':'))
|
||||
time_span = select(span, 'span.thumbtime span', 1)
|
||||
minutes, seconds = (int(v) for v in time_span.text.strip().split(':'))
|
||||
|
||||
yield YoujizzVideo(_id,
|
||||
title=title,
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ import re
|
|||
import datetime
|
||||
from logging import warning
|
||||
|
||||
from weboob.tools.browser import ExpectedElementNotFound
|
||||
from weboob.tools.parsers.lxmlparser import select
|
||||
|
||||
from .base import PornPage
|
||||
from ..video import YoupornVideo
|
||||
|
|
@ -42,11 +42,7 @@ class VideoPage(PornPage):
|
|||
return el[0].cssselect('a')[0].attrib['href']
|
||||
|
||||
def get_title(self):
|
||||
selector = '#videoArea h1'
|
||||
try:
|
||||
element = self.document.getroot().cssselect(selector)[0]
|
||||
except IndexError:
|
||||
raise ExpectedElementNotFound(selector)
|
||||
element = select(self.document.getroot(), '#videoArea h1', 1)
|
||||
return unicode(element.getchildren()[0].tail).strip()
|
||||
|
||||
DATE_REGEXP = re.compile("\w+ (\w+) (\d+) (\d+):(\d+):(\d+) (\d+)")
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
|
||||
import re
|
||||
|
||||
from weboob.tools.browser import BasePage, ExpectedElementNotFound
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.tools.parsers.lxmlparser import select
|
||||
|
||||
from .video import YoutubeVideo
|
||||
|
||||
|
|
@ -32,11 +33,7 @@ class ForbiddenVideo(Exception):
|
|||
|
||||
class ForbiddenVideoPage(BasePage):
|
||||
def on_loaded(self):
|
||||
selector = '.yt-alert-content'
|
||||
try:
|
||||
element = self.document.getroot().cssselect(selector)[0]
|
||||
except IndexError:
|
||||
raise ExpectedElementNotFound(selector)
|
||||
element = select(self.document.getroot(), '.yt-alert-content', 1)
|
||||
raise ForbiddenVideo(element.text.strip())
|
||||
|
||||
|
||||
|
|
@ -57,19 +54,11 @@ class VideoPage(BasePage):
|
|||
)
|
||||
|
||||
def get_author(self):
|
||||
selector = 'a.watch-description-username strong'
|
||||
try:
|
||||
element = self.document.getroot().cssselect(selector)[0]
|
||||
except IndexError:
|
||||
raise ExpectedElementNotFound(selector)
|
||||
element = select(self.document.getroot(), 'a.watch-description-username strong', 1)
|
||||
return element.text.strip()
|
||||
|
||||
def get_title(self):
|
||||
selector = 'meta[name=title]'
|
||||
try:
|
||||
element = self.document.getroot().cssselect(selector)[0]
|
||||
except IndexError:
|
||||
raise ExpectedElementNotFound(selector)
|
||||
element = select(self.document.getroot(), 'meta[name=title]', 1)
|
||||
return unicode(element.attrib['content']).strip()
|
||||
|
||||
def get_url(self, _id):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue