From 88f6fa90ca225b96875ad81d482cad85def690d2 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Wed, 14 Apr 2010 20:59:12 +0200 Subject: [PATCH 1/6] import parsers only if library used is available --- weboob/tools/parser/__init__.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/weboob/tools/parser/__init__.py b/weboob/tools/parser/__init__.py index f30cfb22..5915b6da 100644 --- a/weboob/tools/parser/__init__.py +++ b/weboob/tools/parser/__init__.py @@ -18,7 +18,17 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ -from .elementtidyparser import ElementTidyParser -from .html5libparser import Html5libParser -from .lxmlparser import LxmlHtmlParser from .standardparser import StandardParser, tostring + +try: + from .elementtidyparser import ElementTidyParser +except ImportError: + pass +try: + from .html5libparser import Html5libParser +except ImportError: + pass +try: + from .lxmlparser import LxmlHtmlParser +except ImportError: + pass From 9fb8d540badbdd94faba40ff7f05105822fea321 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Wed, 14 Apr 2010 23:50:52 +0200 Subject: [PATCH 2/6] fix instanciation of parser --- weboob/backends/transilien/browser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weboob/backends/transilien/browser.py b/weboob/backends/transilien/browser.py index e6451b16..095bd233 100644 --- a/weboob/backends/transilien/browser.py +++ b/weboob/backends/transilien/browser.py @@ -122,7 +122,7 @@ class Transilien(Browser): } def __init__(self): - Browser.__init__(self, '', parser=Parser) + Browser.__init__(self, '', parser=Parser()) def iter_station_search(self, pattern): pass From 07b54f47a062c2b10f924ce21d42e112c990ecc2 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Thu, 15 Apr 2010 01:31:51 +0200 Subject: [PATCH 3/6] better code and check youtube.com is in url --- weboob/backends/youtube/backend.py | 3 +++ weboob/backends/youtube/browser.py | 21 ++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/weboob/backends/youtube/backend.py b/weboob/backends/youtube/backend.py index 60214c07..1cad4a8e 100644 --- a/weboob/backends/youtube/backend.py +++ b/weboob/backends/youtube/backend.py @@ -38,6 +38,9 @@ class YoutubeBackend(Backend, ICapVideoProvider): def inner(self, *args, **kwargs): if not self.browser: self.browser = YoutubeBrowser() + url = args[0] + if u'youtube.com' not in url: + return None return func(self, *args, **kwargs) return inner diff --git a/weboob/backends/youtube/browser.py b/weboob/backends/youtube/browser.py index a56fd2f1..d701118d 100644 --- a/weboob/backends/youtube/browser.py +++ b/weboob/backends/youtube/browser.py @@ -26,7 +26,7 @@ from weboob.tools.parser import LxmlHtmlParser from .pages import VideoPage class YoutubeBrowser(Browser): - regex = re.compile(r'&t=([^ ,&]*)') + video_signature_regex = re.compile(r'&t=([^ ,&]*)') def __init__(self, *args, **kwargs): kwargs['parser'] = LxmlHtmlParser() @@ -38,13 +38,16 @@ class YoutubeBrowser(Browser): return self.page.title def get_video_url(self, page_url): - result = self.openurl(page_url).read() - for _signature in re.finditer(self.regex, result): - signature = _signature.group(1) - break + def find_video_signature(data): + for video_signature in re.finditer(self.video_signature_regex, data): + return video_signature.group(1) + return None + data = self.openurl(page_url).read() + video_signature = find_video_signature(data) + m = re.match(r'http://.*\.youtube\.com/watch\?v=(.+)', page_url) + if m: + video_id = m.group(1) + url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&fmt=18' % (video_id, video_signature) + return url else: return None - m = re.match(r'http://.*\.youtube\.com/watch\?v=(.+)', page_url) - video_id = m.group(1) - url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&fmt=18' % (video_id, signature) - return url From 68ca3c818065cf2e8ffc8667f16ab64432c95462 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Thu, 15 Apr 2010 01:32:09 +0200 Subject: [PATCH 4/6] [videoob] stop on first match --- weboob/frontends/videoob/application.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/weboob/frontends/videoob/application.py b/weboob/frontends/videoob/application.py index 31b6bf71..83858656 100644 --- a/weboob/frontends/videoob/application.py +++ b/weboob/frontends/videoob/application.py @@ -32,9 +32,15 @@ class Videoob(ConsoleApplication): @ConsoleApplication.command('Get video file URL from page URL') def command_file_url(self, url): for name, backend in self.weboob.iter_backends(ICapVideoProvider): - print backend.get_video_url(url) + video_url = backend.get_video_url(url) + if video_url: + print video_url + break @ConsoleApplication.command('Get video title from page URL') def command_title(self, url): for name, backend in self.weboob.iter_backends(ICapVideoProvider): - print backend.get_video_title(url) + video_title = backend.get_video_title(url) + if video_title: + print video_title + break From ba32b120af69bd97ab2b3651761b361a1476a479 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Thu, 15 Apr 2010 01:32:34 +0200 Subject: [PATCH 5/6] add backend youjizz --- weboob/backends/youjizz/__init__.py | 21 +++++++++++ weboob/backends/youjizz/backend.py | 57 +++++++++++++++++++++++++++++ weboob/backends/youjizz/browser.py | 49 +++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 weboob/backends/youjizz/__init__.py create mode 100644 weboob/backends/youjizz/backend.py create mode 100644 weboob/backends/youjizz/browser.py diff --git a/weboob/backends/youjizz/__init__.py b/weboob/backends/youjizz/__init__.py new file mode 100644 index 00000000..d7ba19fc --- /dev/null +++ b/weboob/backends/youjizz/__init__.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Roger Philibert + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +from .backend import YoujizzBackend diff --git a/weboob/backends/youjizz/backend.py b/weboob/backends/youjizz/backend.py new file mode 100644 index 00000000..1bcc632c --- /dev/null +++ b/weboob/backends/youjizz/backend.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Roger Philibert + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +from weboob.backend import Backend +from weboob.capabilities.video import ICapVideoProvider + +from .browser import YoujizzBrowser + +class YoujizzBackend(Backend, ICapVideoProvider): + NAME = 'youjizz' + MAINTAINER = 'Roger Philibert' + EMAIL = 'roger.philibert@gmail.com' + VERSION = '0.1' + DESCRIPTION = 'Youjizz videos website' + LICENSE = 'GPLv3' + + CONFIG = {} + browser = None + + def need_browser(func): + def inner(self, *args, **kwargs): + if not self.browser: + self.browser = YoujizzBrowser() + url = args[0] + if u'youjizz.com' not in url: + return None + return func(self, *args, **kwargs) + return inner + + @need_browser + def get_video_title(self, page_url): + return self.browser.get_video_title(page_url) + + @need_browser + def get_video_url(self, page_url): + return self.browser.get_video_url(page_url) + + @need_browser + def iter_page_urls(self, mozaic_url): + return self.browser.iter_page_urls(mozaic_url) diff --git a/weboob/backends/youjizz/browser.py b/weboob/backends/youjizz/browser.py new file mode 100644 index 00000000..71bf2dd0 --- /dev/null +++ b/weboob/backends/youjizz/browser.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Roger Philibert + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +from logging import error +import re + +from weboob.tools.browser import Browser +from weboob.tools.parser import LxmlHtmlParser + +class YoujizzBrowser(Browser): + video_file_regex = re.compile(r'"(http://media[^ ,]+\.flv)"') + + def __init__(self, *args, **kwargs): + kwargs['parser'] = LxmlHtmlParser() + Browser.__init__(self, *args, **kwargs) + + def iter_page_urls(self, mozaic_url): + raise NotImplementedError() + + def get_video_title(self, page_url): + raise NotImplementedError() + + def get_video_url(self, page_url): + data = self.openurl(page_url).read() + video_file_urls = re.findall(self.video_file_regex, data) + if len(video_file_urls) == 0: + return None + else: + if len(video_file_urls) > 1: + error('Many video file URL found for given URL: %s' % video_file_urls) + return video_file_urls[0] + From 15fe4238161dc983784c42e3911a6f9bf0c22ac5 Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Thu, 15 Apr 2010 11:58:21 +0200 Subject: [PATCH 6/6] add copyright, author's email, url and dependencies to setup.py --- setup.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 178373b9..70e216ab 100755 --- a/setup.py +++ b/setup.py @@ -1,6 +1,24 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- +""" +Copyright(C) 2010 Christophe Benz + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + try: from setuptools import setup, find_packages except ImportError: @@ -15,9 +33,12 @@ setup( version='0.1', description='Weboob, web out of the browser', author='Romain Bignon', - author_email='', + author_email='romain@peerfuse.org', license='GPLv3', - url='', + url='http://www.weboob.org', packages=find_packages(exclude=['ez_setup']), scripts=[os.path.join('scripts', script) for script in os.listdir('scripts')], + install_requires=[ + 'pyyaml', + ] )