diff --git a/modules/youporn/pages/index.py b/modules/youporn/pages/index.py index 22efea65..42cae84d 100644 --- a/modules/youporn/pages/index.py +++ b/modules/youporn/pages/index.py @@ -18,6 +18,7 @@ # along with weboob. If not, see . +import re import datetime from .base import PornPage @@ -29,47 +30,45 @@ __all__ = ['IndexPage'] class IndexPage(PornPage): def iter_videos(self): - uls = self.document.getroot().cssselect("ul[class=clearfix]") - if not uls: - return + for li in self.document.getroot().xpath('//ul/li[@class="videoBox"]'): + a = li.find('a') + if a is None or a.find('img') is None: + continue - for ul in uls: - for li in ul.findall('li'): - a = li.find('a') - if a is None or a.find('img') is None: - continue + thumbnail_url = a.find('img').attrib['src'] - thumbnail_url = a.find('img').attrib['src'] + h1 = li.find('h1') + a = h1.find('a') + if a is None: + continue - h1 = li.find('h1') - a = h1.find('a') - if a is None: - continue + url = a.attrib['href'] + _id = url[len('/watch/'):] + _id = _id[:_id.find('/')] + title = a.text.strip() - url = a.attrib['href'] - _id = url[len('/watch/'):] - _id = _id[:_id.find('/')] - title = a.text.strip() + hours = minutes = seconds = 0 + div = li.cssselect('h2[class=duration]') + if len(div) > 0: + pack = [int(s) for s in div[0].text.strip().split(':')] + if len(pack) == 3: + hours, minutes, seconds = pack + elif len(pack) == 2: + minutes, seconds = pack - minutes = seconds = 0 - div = li.cssselect('div[class=duration_views]') - if div: - h2 = div[0].find('h2') - minutes = int(h2.text.strip()) - seconds = int(h2.find('span').tail.strip()) + rating = 0 + rating_max = 0 + div = li.cssselect('div.stars') + if div: + m = re.match('.*star-(\d).*', div[0].attrib.get('class', '')) + if m: + rating = int(m.group(1)) + rating_max = 5 - rating = 0 - rating_max = 0 - div = li.cssselect('div[class=rating]') - if div: - p = div[0].find('p') - rating = float(p.text.strip()) - rating_max = float(p.find('span').text.strip()[2:]) - - yield YoupornVideo(int(_id), - title=title, - rating=rating, - rating_max=rating_max, - duration=datetime.timedelta(minutes=minutes, seconds=seconds), - thumbnail_url=thumbnail_url, - ) + yield YoupornVideo(int(_id), + title=title, + rating=rating, + rating_max=rating_max, + duration=datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds), + thumbnail_url=thumbnail_url, + ) diff --git a/modules/youporn/pages/video.py b/modules/youporn/pages/video.py index 03d1f163..cf9337cb 100644 --- a/modules/youporn/pages/video.py +++ b/modules/youporn/pages/video.py @@ -20,8 +20,9 @@ import re import datetime +from dateutil.parser import parse as parse_dt - +from weboob.tools.browser import BrokenPageError from .base import PornPage from ..video import YoupornVideo @@ -39,8 +40,11 @@ class VideoPage(PornPage): return video def get_url(self): - download_div = self.parser.select(self.document.getroot(), '#download', 1) - a = self.parser.select(download_div, 'a', 1) + download_div = self.parser.select(self.document.getroot(), 'div#tab-general-download ul li') + if len(download_div) < 1: + raise BrokenPageError('Unable to find file URL') + + a = self.parser.select(download_div[0], 'a', 1) m = re.match('^(\w+) - .*', a.text) if m: ext = m.group(1).lower() @@ -49,27 +53,25 @@ class VideoPage(PornPage): return a.attrib['href'], ext def get_title(self): - element = self.parser.select(self.document.getroot(), '#videoArea h1', 1) - return unicode(element.getchildren()[0].tail).strip() - - DATE_REGEXP = re.compile("\w+ (\w+) (\d+) (\d+):(\d+):(\d+) (\d+)") - MONTH2I = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + element = self.parser.select(self.document.getroot(), '#videoCanvas h1', 1) + return element.text.strip().decode('utf-8') def set_details(self, v): - details_div = self.parser.select(self.document.getroot(), '#details', 1) - for li in details_div.getiterator('li'): - span = li.find('span') + for li in self.parser.select(self.document.getroot(), 'div#tab-general-details ul li'): + span = li.find('b') name = span.text.strip() value = span.tail.strip() if name == 'Duration:': - seconds = minutes = 0 - for word in value.split(): - if word.endswith('min'): - minutes = int(word[:word.find('min')]) - elif word.endswith('sec'): - seconds = int(word[:word.find('sec')]) - v.duration = datetime.timedelta(minutes=minutes, seconds=seconds) + m = re.match('((\d+)hrs)?((\d+)min)?(\d+)?', value) + if not m: + raise BrokenPageError('Unable to parse datetime: %r' % value) + hours = m.group(2) or 0 + minutes = m.group(4) or 0 + seconds = m.group(5) or 0 + v.duration = datetime.timedelta(hours=int(hours), + minutes=int(minutes), + seconds=int(seconds)) elif name == 'Submitted:': author = li.find('i') if author is None: @@ -83,12 +85,4 @@ class VideoPage(PornPage): v.rating = float(r[0]) v.rating_max = float(r[2]) elif name == 'Date:': - m = self.DATE_REGEXP.match(value) - if m: - month = self.MONTH2I.index(m.group(1)) - day = int(m.group(2)) - hour = int(m.group(3)) - minute = int(m.group(4)) - second = int(m.group(5)) - year = int(m.group(6)) - v.date = datetime.datetime(year, month, day, hour, minute, second) + v.date = parse_dt(value)