[youporn] rewrite with Browser 2
This commit is contained in:
parent
83fe30af8b
commit
2d0df1b37a
5 changed files with 59 additions and 170 deletions
|
|
@ -18,40 +18,36 @@
|
|||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.deprecated.browser import Browser
|
||||
from weboob.deprecated.browser.decorators import id2url
|
||||
from weboob.browser import PagesBrowser, URL
|
||||
|
||||
from .pages.index import IndexPage
|
||||
from .pages.video import VideoPage
|
||||
from .video import YoupornVideo
|
||||
|
||||
|
||||
__all__ = ['YoupornBrowser']
|
||||
|
||||
|
||||
class YoupornBrowser(Browser):
|
||||
DOMAIN = 'www.youporn.com'
|
||||
ENCODING = None
|
||||
PAGES = {r'http://[w\.]*youporn\.com/?': IndexPage,
|
||||
r'http://[w\.]*youporn\.com/search.*': IndexPage,
|
||||
r'http://[w\.]*youporn\.com/watch/(?P<id>\d+)/?.*': VideoPage,
|
||||
r'http://[w\.]*youporngay\.com:80/watch/(?P<id>.+)': VideoPage,
|
||||
}
|
||||
class YoupornBrowser(PagesBrowser):
|
||||
BASEURL = 'http://www.youporn.com'
|
||||
|
||||
@id2url(YoupornVideo.id2url)
|
||||
def get_video(self, url, video=None):
|
||||
self.location(url)
|
||||
return self.page.get_video(video)
|
||||
home = URL('/$', IndexPage)
|
||||
search = URL('/search/\?query=(?P<query>.*)', IndexPage)
|
||||
video = URL('/watch/(?P<id>[0-9]+)/.*', VideoPage)
|
||||
|
||||
def get_video(self, _id):
|
||||
self.video.go(id=_id)
|
||||
assert self.video.is_here()
|
||||
return self.page.get_video()
|
||||
|
||||
def search_videos(self, pattern, sortby):
|
||||
if pattern == 'a' or pattern == 'i':
|
||||
raise ValueError('this pattern is not supported');
|
||||
|
||||
self.location(self.buildurl('/search/%s' % sortby, query=pattern.encode('utf-8')))
|
||||
assert self.is_on_page(IndexPage)
|
||||
self.search.go(query=pattern)
|
||||
assert self.search.is_here()
|
||||
return self.page.iter_videos()
|
||||
|
||||
def latest_videos(self):
|
||||
self.home()
|
||||
assert self.is_on_page(IndexPage)
|
||||
self.home.go()
|
||||
assert self.home.is_here()
|
||||
return self.page.iter_videos()
|
||||
|
|
|
|||
|
|
@ -39,27 +39,18 @@ class YoupornModule(Module, CapVideo, CapCollection):
|
|||
BROWSER = YoupornBrowser
|
||||
|
||||
def get_video(self, _id):
|
||||
with self.browser:
|
||||
return self.browser.get_video(_id)
|
||||
return self.browser.get_video(_id)
|
||||
|
||||
SORTBY = ['relevance', 'rating', 'views', 'time']
|
||||
|
||||
def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False):
|
||||
if not nsfw:
|
||||
return set()
|
||||
with self.browser:
|
||||
return self.browser.search_videos(pattern, self.SORTBY[sortby])
|
||||
|
||||
return self.browser.search_videos(pattern, self.SORTBY[sortby])
|
||||
|
||||
def fill_video(self, video, fields):
|
||||
if fields != ['thumbnail']:
|
||||
# if we don't want only the thumbnail, we probably want also every fields
|
||||
with self.browser:
|
||||
video = self.browser.get_video(YoupornVideo.id2url(video.id), video)
|
||||
if 'thumbnail' in fields and video.thumbnail:
|
||||
with self.browser:
|
||||
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
|
||||
|
||||
return video
|
||||
return self.browser.get_video(video.id)
|
||||
|
||||
def iter_resources(self, objs, split_path):
|
||||
if BaseVideo in objs:
|
||||
|
|
|
|||
|
|
@ -1,34 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from weboob.deprecated.mech import ClientForm
|
||||
ControlNotFoundError = ClientForm.ControlNotFoundError
|
||||
|
||||
from mechanize import FormNotFoundError
|
||||
from weboob.deprecated.browser import Page
|
||||
|
||||
|
||||
class PornPage(Page):
|
||||
def on_loaded(self):
|
||||
try:
|
||||
self.browser.select_form(nr=0)
|
||||
self.browser.submit(name='user_choice')
|
||||
return False
|
||||
except (ControlNotFoundError, FormNotFoundError):
|
||||
return True
|
||||
|
|
@ -18,51 +18,35 @@
|
|||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
import datetime
|
||||
|
||||
from weboob.browser.pages import HTMLPage
|
||||
from weboob.browser.elements import ItemElement, ListElement, method
|
||||
from weboob.browser.filters.html import Attr, CSS
|
||||
from weboob.browser.filters.standard import CleanText, Duration, Regexp, Type
|
||||
from weboob.capabilities.base import NotAvailable
|
||||
from weboob.capabilities.image import BaseImage
|
||||
|
||||
from .base import PornPage
|
||||
from ..video import YoupornVideo
|
||||
|
||||
|
||||
class IndexPage(PornPage):
|
||||
def iter_videos(self):
|
||||
for li in self.document.getroot().xpath('//ul/li[@class="videoBox"]'):
|
||||
a = li.find('div').find('a')
|
||||
if a is None or a.find('img') is None:
|
||||
continue
|
||||
class IndexPage(HTMLPage):
|
||||
@method
|
||||
class iter_videos(ListElement):
|
||||
item_xpath = '//div[@id="content"]/div/div/ul/li/div/a'
|
||||
|
||||
thumbnail_url = a.find('img').attrib['src']
|
||||
class item(ItemElement):
|
||||
klass = YoupornVideo
|
||||
|
||||
a = self.parser.select(li, './/a[@class="videoTitle"]', 1, 'xpath')
|
||||
def obj_thumbnail(self):
|
||||
thumbnail_url = Attr('./img', 'src')(self)
|
||||
thumbnail = BaseImage(thumbnail_url)
|
||||
thumbnail.url = thumbnail.id
|
||||
return thumbnail
|
||||
|
||||
url = a.attrib['href']
|
||||
_id = url[len('/watch/'):]
|
||||
_id = _id[:_id.find('/')]
|
||||
|
||||
video = YoupornVideo(int(_id))
|
||||
video.title = unicode(a.text.strip())
|
||||
video.thumbnail = BaseImage(thumbnail_url)
|
||||
video.thumbnail.url = video.thumbnail.id
|
||||
|
||||
hours = minutes = seconds = 0
|
||||
div = li.cssselect('div.duration')
|
||||
if len(div) > 0:
|
||||
pack = [int(s) for s in div[0].text.strip().split(':')]
|
||||
if len(pack) == 3:
|
||||
hours, minutes, seconds = pack
|
||||
elif len(pack) == 2:
|
||||
minutes, seconds = pack
|
||||
|
||||
video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
|
||||
|
||||
div = li.cssselect('div.rating')
|
||||
if div:
|
||||
video.rating = int(div[0].text.strip('% '))
|
||||
video.rating_max = 100
|
||||
|
||||
video.set_empty_fields(NotAvailable, ('url', 'author'))
|
||||
|
||||
yield video
|
||||
obj_author = NotAvailable
|
||||
obj_duration = CSS('span.duration') & CleanText() & Duration()
|
||||
obj_id = Attr('../..', 'data-video-id')
|
||||
obj_rating = CleanText('./span/i') & Regexp(pattern=r'(..)%') & Type(type=int)
|
||||
obj_rating_max = 100
|
||||
obj_title = CleanText('./p')
|
||||
obj_url = NotAvailable
|
||||
|
|
|
|||
|
|
@ -18,74 +18,26 @@
|
|||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
import re
|
||||
import datetime
|
||||
from dateutil.parser import parse as parse_dt
|
||||
|
||||
from weboob.browser.pages import HTMLPage
|
||||
from weboob.browser.elements import ItemElement, method
|
||||
from weboob.browser.filters.html import Link
|
||||
from weboob.browser.filters.standard import CleanText, Regexp, Type
|
||||
from weboob.capabilities.base import NotAvailable
|
||||
from weboob.deprecated.browser import BrokenPageError
|
||||
|
||||
from .base import PornPage
|
||||
from ..video import YoupornVideo
|
||||
|
||||
|
||||
class VideoPage(PornPage):
|
||||
def get_video(self, video=None):
|
||||
if not PornPage.on_loaded(self):
|
||||
return
|
||||
if video is None:
|
||||
video = YoupornVideo(self.group_dict['id'])
|
||||
video.title = self.get_title()
|
||||
video.url, video.ext = self.get_url()
|
||||
self.set_details(video)
|
||||
class VideoPage(HTMLPage):
|
||||
@method
|
||||
class get_video(ItemElement):
|
||||
klass = YoupornVideo
|
||||
|
||||
video.set_empty_fields(NotAvailable)
|
||||
return video
|
||||
|
||||
def get_url(self):
|
||||
download_div = self.parser.select(self.document.getroot(), 'ul.downloadList li')
|
||||
if len(download_div) < 1:
|
||||
raise BrokenPageError('Unable to find file URL')
|
||||
|
||||
a = self.parser.select(download_div[0], 'a', 1)
|
||||
m = re.match('^(\w+) - .*', a.text)
|
||||
if m:
|
||||
ext = m.group(1).lower()
|
||||
else:
|
||||
ext = u'flv'
|
||||
return unicode(a.attrib['href']), unicode(ext)
|
||||
|
||||
def get_title(self):
|
||||
element = self.parser.select(self.document.getroot(), 'h1', 1)
|
||||
return element.text.strip().decode('utf-8')
|
||||
|
||||
def set_details(self, v):
|
||||
for li in self.parser.select(self.document.getroot(), 'ul.spaced li'):
|
||||
span = li.find('label')
|
||||
name = span.text.strip()
|
||||
value = span.tail.strip()
|
||||
|
||||
if name == 'Duration:':
|
||||
m = re.match('((\d+)hrs)?\s*((\d+)min)?\s*((\d+)sec)?', value)
|
||||
if not m:
|
||||
raise BrokenPageError('Unable to parse datetime: %r' % value)
|
||||
hours = m.group(2) or 0
|
||||
minutes = m.group(4) or 0
|
||||
seconds = m.group(6) or 0
|
||||
v.duration = datetime.timedelta(hours=int(hours),
|
||||
minutes=int(minutes),
|
||||
seconds=int(seconds))
|
||||
elif name == 'Submitted:':
|
||||
author = li.find('i')
|
||||
if author is None:
|
||||
author = li.find('a')
|
||||
if author is None:
|
||||
v.author = unicode(value)
|
||||
else:
|
||||
v.author = unicode(author.text)
|
||||
elif name == 'Rating:':
|
||||
value = li.find('span').text
|
||||
v.rating = int(value.rstrip('%'))
|
||||
v.rating_max = 100
|
||||
elif name == 'Date:':
|
||||
v.date = parse_dt(value)
|
||||
obj_author = CleanText('//div[@class="author-block--line"][1]') & Regexp(pattern=r'By: (.*)')
|
||||
#obj_date = Date('//div[@id="stats-date"]')
|
||||
obj_duration = NotAvailable
|
||||
obj_ext = 'mp4'
|
||||
obj_rating = CleanText('//div[@class="rating-percentage"]') & Regexp(pattern=r'(..)%') & Type(type=int)
|
||||
obj_rating_max = 100
|
||||
obj_thumbnail = NotAvailable
|
||||
obj_title = CleanText('//h1')
|
||||
obj_url = Link('//ul[@class="downloadList"]/li[2]/a')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue