support repositories to manage backends (closes #747)

This commit is contained in:
Romain Bignon 2012-01-03 12:10:21 +01:00
commit 14a7a1d362
410 changed files with 1079 additions and 297 deletions

View file

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import YoujizzBackend
__all__ = ['YoujizzBackend']

View file

@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
from weboob.capabilities.video import ICapVideo
from weboob.tools.backend import BaseBackend
from .browser import YoujizzBrowser
from .video import YoujizzVideo
__all__ = ['YoujizzBackend']
class YoujizzBackend(BaseBackend, ICapVideo):
NAME = 'youjizz'
MAINTAINER = 'Roger Philibert'
EMAIL = 'roger.philibert@gmail.com'
VERSION = '0.a'
DESCRIPTION = 'Youjizz videos website'
LICENSE = 'AGPLv3+'
BROWSER = YoujizzBrowser
def get_video(self, _id):
with self.browser:
video = self.browser.get_video(_id)
return video
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
if not nsfw:
return set()
with self.browser:
return self.browser.iter_search_results(pattern)
def fill_video(self, video, fields):
if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields
with self.browser:
video = self.browser.get_video(YoujizzVideo.id2url(video.id), video)
if 'thumbnail' in fields and video.thumbnail:
with self.browser:
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
return video
OBJECTS = {YoujizzVideo: fill_video}

View file

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url
from .pages.index import IndexPage
from .pages.video import VideoPage
from .video import YoujizzVideo
__all__ = ['YoujizzBrowser']
class YoujizzBrowser(BaseBrowser):
DOMAIN = 'youjizz.com'
ENCODING = None
PAGES = {r'http://.*youjizz\.com/?': IndexPage,
r'http://.*youjizz\.com/index.php': IndexPage,
r'http://.*youjizz\.com/search/(?P<pattern>.+)\.html': IndexPage,
r'http://.*youjizz\.com/videos/(?P<id>.+)\.html': VideoPage,
}
@id2url(YoujizzVideo.id2url)
def get_video(self, url, video=None):
self.location(url)
assert self.is_on_page(VideoPage), 'Should be on video page.'
return self.page.get_video(video)
def iter_search_results(self, pattern):
if not pattern:
self.home()
else:
self.location('/search/%s-1.html' % (urllib.quote_plus(pattern.encode('utf-8'))))
assert self.is_on_page(IndexPage)
return self.page.iter_videos()

BIN
modules/youjizz/favicon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

View file

View file

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import datetime
import re
from weboob.tools.browser import BasePage
from weboob.tools.browser import BrokenPageError
from ..video import YoujizzVideo
__all__ = ['IndexPage']
class IndexPage(BasePage):
def iter_videos(self):
span_list = self.parser.select(self.document.getroot(), 'span#miniatura')
for span in span_list:
a = self.parser.select(span, 'a', 1)
url = a.attrib['href']
_id = re.sub(r'/videos/(.+)\.html', r'\1', url)
thumbnail_url = span.find('.//img').attrib['src']
title_el = self.parser.select(span, 'span#title1', 1)
title = title_el.text.strip()
time_span = self.parser.select(span, 'span.thumbtime span', 1)
time_txt = time_span.text.strip().replace(';', ':')
if time_txt == 'N/A':
minutes, seconds = 0, 0
elif ':' in time_txt:
minutes, seconds = (int(v) for v in time_txt.split(':'))
else:
raise BrokenPageError('Unable to parse the video duration: %s' % time_txt)
yield YoujizzVideo(_id,
title=title,
duration=datetime.timedelta(minutes=minutes, seconds=seconds),
thumbnail_url=thumbnail_url,
)

View file

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import datetime
import lxml.html
import re
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.misc import to_unicode
from ..video import YoujizzVideo
__all__ = ['VideoPage']
class VideoPage(BasePage):
def get_video(self, video=None):
_id = to_unicode(self.group_dict['id'])
if video is None:
video = YoujizzVideo(_id)
title_el = self.parser.select(self.document.getroot(), 'title', 1)
video.title = to_unicode(title_el.text.strip())
# youjizz HTML is crap, we must parse it with regexps
data = lxml.html.tostring(self.document.getroot())
m = re.search(r'<strong>.*?Runtime.*?</strong> (.+?)<br.*>', data)
if m:
txt = m.group(1).strip()
if txt == 'Unknown':
video.duration = NotAvailable
else:
minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
video.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
else:
raise BrokenPageError('Unable to retrieve video duration')
video_file_urls = re.findall(r'"(http://[^",]+\.youjizz\.com[^",]+\.flv)[\?"]', data)
if len(video_file_urls) == 0:
raise BrokenPageError('Video URL not found')
elif len(video_file_urls) > 1:
raise BrokenPageError('Many video file URL found')
else:
video.url = video_file_urls[0]
return video

33
modules/youjizz/test.py Normal file
View file

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
class YoujizzTest(BackendTest):
BACKEND = 'youjizz'
def test_youjizz(self):
self.assertTrue(len(self.backend.iter_search_results('anus', nsfw=False)) == 0)
l = list(self.backend.iter_search_results('sex', nsfw=True))
self.assertTrue(len(l) > 0)
v = l[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))

35
modules/youjizz/video.py Normal file
View file

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
__all__ = ['YoujizzVideo']
class YoujizzVideo(BaseVideo):
def __init__(self, *args, **kwargs):
BaseVideo.__init__(self, *args, **kwargs)
self.nsfw = True
self.ext = 'flv'
@classmethod
def id2url(cls, _id):
return 'http://www.youjizz.com/videos/%s.html' % _id