support repositories to manage backends (closes #747)

This commit is contained in:
Romain Bignon 2012-01-03 12:10:21 +01:00
commit 14a7a1d362
410 changed files with 1079 additions and 297 deletions

View file

@ -0,0 +1,3 @@
from .backend import YoupornBackend
__all__ = ['YoupornBackend']

View file

@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
from weboob.capabilities.video import ICapVideo
from weboob.tools.backend import BaseBackend
from .browser import YoupornBrowser
from .video import YoupornVideo
__all__ = ['YoupornBackend']
class YoupornBackend(BaseBackend, ICapVideo):
NAME = 'youporn'
MAINTAINER = 'Romain Bignon'
EMAIL = 'romain@weboob.org'
VERSION = '0.a'
DESCRIPTION = 'Youporn videos website'
LICENSE = 'AGPLv3+'
BROWSER = YoupornBrowser
def get_video(self, _id):
with self.browser:
return self.browser.get_video(_id)
SORTBY = ['relevance', 'rating', 'views', 'time']
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
if not nsfw:
return set()
with self.browser:
return self.browser.iter_search_results(pattern, self.SORTBY[sortby])
def fill_video(self, video, fields):
if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields
with self.browser:
video = self.browser.get_video(YoupornVideo.id2url(video.id), video)
if 'thumbnail' in fields and video.thumbnail:
with self.browser:
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
return video
OBJECTS = {YoupornVideo: fill_video}

View file

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url
from .pages.index import IndexPage
from .pages.video import VideoPage
from .video import YoupornVideo
__all__ = ['YoupornBrowser']
class YoupornBrowser(BaseBrowser):
DOMAIN = 'youporn.com'
ENCODING = None
PAGES = {r'http://[w\.]*youporn\.com/?': IndexPage,
r'http://[w\.]*youporn\.com/search.*': IndexPage,
r'http://[w\.]*youporn\.com/watch/(?P<id>\d+)/?.*': VideoPage,
r'http://[w\.]*youporngay\.com:80/watch/(?P<id>.+)': VideoPage,
}
@id2url(YoupornVideo.id2url)
def get_video(self, url, video=None):
self.location(url)
return self.page.get_video(video)
def iter_search_results(self, pattern, sortby):
if not pattern:
self.home()
else:
self.location(self.buildurl('/search/%s' % sortby, query=pattern.encode('utf-8')))
assert self.is_on_page(IndexPage)
return self.page.iter_videos()

BIN
modules/youporn/favicon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

View file

View file

@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.mech import ClientForm
ControlNotFoundError = ClientForm.ControlNotFoundError
from mechanize import FormNotFoundError
from weboob.tools.browser import BasePage
__all__ = ['PornPage']
class PornPage(BasePage):
def on_loaded(self):
try:
self.browser.select_form(nr=0)
self.browser.submit(name='user_choice')
return False
except (ControlNotFoundError, FormNotFoundError):
return True

View file

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import datetime
from .base import PornPage
from ..video import YoupornVideo
__all__ = ['IndexPage']
class IndexPage(PornPage):
def iter_videos(self):
uls = self.document.getroot().cssselect("ul[class=clearfix]")
if not uls:
return
for ul in uls:
for li in ul.findall('li'):
a = li.find('a')
if a is None or a.find('img') is None:
continue
thumbnail_url = a.find('img').attrib['src']
h1 = li.find('h1')
a = h1.find('a')
if a is None:
continue
url = a.attrib['href']
_id = url[len('/watch/'):]
_id = _id[:_id.find('/')]
title = a.text.strip()
minutes = seconds = 0
div = li.cssselect('div[class=duration_views]')
if div:
h2 = div[0].find('h2')
minutes = int(h2.text.strip())
seconds = int(h2.find('span').tail.strip())
rating = 0
rating_max = 0
div = li.cssselect('div[class=rating]')
if div:
p = div[0].find('p')
rating = float(p.text.strip())
rating_max = float(p.find('span').text.strip()[2:])
yield YoupornVideo(int(_id),
title=title,
rating=rating,
rating_max=rating_max,
duration=datetime.timedelta(minutes=minutes, seconds=seconds),
thumbnail_url=thumbnail_url,
)

View file

@ -0,0 +1,94 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import datetime
from .base import PornPage
from ..video import YoupornVideo
class VideoPage(PornPage):
def get_video(self, video=None):
if not PornPage.on_loaded(self):
return
if video is None:
video = YoupornVideo(self.group_dict['id'])
video.title = self.get_title()
video.url, video.ext = self.get_url()
self.set_details(video)
return video
def get_url(self):
download_div = self.parser.select(self.document.getroot(), '#download', 1)
a = self.parser.select(download_div, 'a', 1)
m = re.match('^(\w+) - .*', a.text)
if m:
ext = m.group(1).lower()
else:
ext = 'flv'
return a.attrib['href'], ext
def get_title(self):
element = self.parser.select(self.document.getroot(), '#videoArea h1', 1)
return unicode(element.getchildren()[0].tail).strip()
DATE_REGEXP = re.compile("\w+ (\w+) (\d+) (\d+):(\d+):(\d+) (\d+)")
MONTH2I = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
def set_details(self, v):
details_div = self.parser.select(self.document.getroot(), '#details', 1)
for li in details_div.getiterator('li'):
span = li.find('span')
name = span.text.strip()
value = span.tail.strip()
if name == 'Duration:':
seconds = minutes = 0
for word in value.split():
if word.endswith('min'):
minutes = int(word[:word.find('min')])
elif word.endswith('sec'):
seconds = int(word[:word.find('sec')])
v.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
elif name == 'Submitted:':
author = li.find('i')
if author is None:
author = li.find('a')
if author is None:
v.author = value
else:
v.author = author.text
elif name == 'Rating:':
r = value.split()
v.rating = float(r[0])
v.rating_max = float(r[2])
elif name == 'Date:':
m = self.DATE_REGEXP.match(value)
if m:
month = self.MONTH2I.index(m.group(1))
day = int(m.group(2))
hour = int(m.group(3))
minute = int(m.group(4))
second = int(m.group(5))
year = int(m.group(6))
v.date = datetime.datetime(year, month, day, hour, minute, second)

34
modules/youporn/test.py Normal file
View file

@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
class YoupornTest(BackendTest):
BACKEND = 'youporn'
def test_youporn(self):
self.assertTrue(len(self.backend.iter_search_results('penis', nsfw=False)) == 0)
l = list(self.backend.iter_search_results('ass to mouth', nsfw=True))
self.assertTrue(len(l) > 0)
v = l[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
self.backend.browser.openurl(v.url)

38
modules/youporn/video.py Normal file
View file

@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
__all__ = ['YoupornVideo']
class YoupornVideo(BaseVideo):
def __init__(self, *args, **kwargs):
BaseVideo.__init__(self, *args, **kwargs)
self.nsfw = True
self.ext = 'flv'
@classmethod
def id2url(cls, _id):
if _id.isdigit():
return 'http://www.youporn.com/watch/%d' % int(_id)
else:
return None