From f5f6799633c21746cb23571768c475e8316e6d5a Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Wed, 14 Apr 2010 20:59:12 +0200 Subject: [PATCH 1/8] import parsers only if library used is available --- weboob/tools/parser/__init__.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/weboob/tools/parser/__init__.py b/weboob/tools/parser/__init__.py index f30cfb22..5915b6da 100644 --- a/weboob/tools/parser/__init__.py +++ b/weboob/tools/parser/__init__.py @@ -18,7 +18,17 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ -from .elementtidyparser import ElementTidyParser -from .html5libparser import Html5libParser -from .lxmlparser import LxmlHtmlParser from .standardparser import StandardParser, tostring + +try: + from .elementtidyparser import ElementTidyParser +except ImportError: + pass +try: + from .html5libparser import Html5libParser +except ImportError: + pass +try: + from .lxmlparser import LxmlHtmlParser +except ImportError: + pass From d5d360bdcf46cba5616d0bea726229092586ba12 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Wed, 14 Apr 2010 23:50:52 +0200 Subject: [PATCH 2/8] fix instanciation of parser --- weboob/backends/transilien/browser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weboob/backends/transilien/browser.py b/weboob/backends/transilien/browser.py index e6451b16..095bd233 100644 --- a/weboob/backends/transilien/browser.py +++ b/weboob/backends/transilien/browser.py @@ -122,7 +122,7 @@ class Transilien(Browser): } def __init__(self): - Browser.__init__(self, '', parser=Parser) + Browser.__init__(self, '', parser=Parser()) def iter_station_search(self, pattern): pass From 86d227fd6ee27b8ed4c20ef5f6bddfbce32b9281 Mon Sep 17 00:00:00 2001 From: Juke Date: Thu, 15 Apr 2010 12:41:45 +0200 Subject: [PATCH 3/8] loop optimisation --- weboob/frontends/travel_ui/application.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/weboob/frontends/travel_ui/application.py b/weboob/frontends/travel_ui/application.py index 6d4a795c..f73bc0f7 100644 --- a/weboob/frontends/travel_ui/application.py +++ b/weboob/frontends/travel_ui/application.py @@ -87,7 +87,7 @@ class TransilienUI(): liste = [] #liste = ConfFile('/opt/masstransit/masstransit.cfg').config.items('ListeDesGares') - for name, backend in self.weboob.iter_backends(): + for None, backend in self.weboob.iter_backends(): for station in backend.iter_station_search(""): liste.append(station) @@ -143,11 +143,10 @@ class TransilienUI(): self.treestore.clear() for name, backend in self.weboob.iter_backends(): for station in backend.iter_station_search(self.combo_source.get_current_text()): - for name, backend in self.weboob.iter_backends(): - for arrival in backend.iter_station_search(self.combo_dest.get_current_text()): - for name, backend, in self.weboob.iter_backends(): - for departure in backend.iter_station_departures(station.id, arrival.id): - self.treestore.append(None, [departure.type, departure.time, departure.arrival_station, departure.information]) + for arrival in backend.iter_station_search(self.combo_dest.get_current_text()): + for departure in backend.iter_station_departures(station.id, arrival.id): + self.treestore.append(None, [departure.type, departure.time, departure.arrival_station, departure.information]) + class Travel(BaseApplication): APPNAME = 'travel' From a1f3452c62a7d8fe6c0b9cb223ea51b52fefeb48 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Wed, 14 Apr 2010 20:59:12 +0200 Subject: [PATCH 4/8] import parsers only if library used is available --- weboob/tools/parser/__init__.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/weboob/tools/parser/__init__.py b/weboob/tools/parser/__init__.py index f30cfb22..5915b6da 100644 --- a/weboob/tools/parser/__init__.py +++ b/weboob/tools/parser/__init__.py @@ -18,7 +18,17 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ -from .elementtidyparser import ElementTidyParser -from .html5libparser import Html5libParser -from .lxmlparser import LxmlHtmlParser from .standardparser import StandardParser, tostring + +try: + from .elementtidyparser import ElementTidyParser +except ImportError: + pass +try: + from .html5libparser import Html5libParser +except ImportError: + pass +try: + from .lxmlparser import LxmlHtmlParser +except ImportError: + pass From fa5c6ba5cb958b6aa735feac68dcf2859f0f51f5 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Wed, 14 Apr 2010 23:50:52 +0200 Subject: [PATCH 5/8] fix instanciation of parser --- weboob/backends/transilien/browser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weboob/backends/transilien/browser.py b/weboob/backends/transilien/browser.py index e6451b16..095bd233 100644 --- a/weboob/backends/transilien/browser.py +++ b/weboob/backends/transilien/browser.py @@ -122,7 +122,7 @@ class Transilien(Browser): } def __init__(self): - Browser.__init__(self, '', parser=Parser) + Browser.__init__(self, '', parser=Parser()) def iter_station_search(self, pattern): pass From 1f91aa247cb3378ca97912769c68bac59fc39b28 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Thu, 15 Apr 2010 01:31:51 +0200 Subject: [PATCH 6/8] better code and check youtube.com is in url --- weboob/backends/youtube/backend.py | 3 +++ weboob/backends/youtube/browser.py | 21 ++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/weboob/backends/youtube/backend.py b/weboob/backends/youtube/backend.py index 60214c07..1cad4a8e 100644 --- a/weboob/backends/youtube/backend.py +++ b/weboob/backends/youtube/backend.py @@ -38,6 +38,9 @@ class YoutubeBackend(Backend, ICapVideoProvider): def inner(self, *args, **kwargs): if not self.browser: self.browser = YoutubeBrowser() + url = args[0] + if u'youtube.com' not in url: + return None return func(self, *args, **kwargs) return inner diff --git a/weboob/backends/youtube/browser.py b/weboob/backends/youtube/browser.py index a56fd2f1..d701118d 100644 --- a/weboob/backends/youtube/browser.py +++ b/weboob/backends/youtube/browser.py @@ -26,7 +26,7 @@ from weboob.tools.parser import LxmlHtmlParser from .pages import VideoPage class YoutubeBrowser(Browser): - regex = re.compile(r'&t=([^ ,&]*)') + video_signature_regex = re.compile(r'&t=([^ ,&]*)') def __init__(self, *args, **kwargs): kwargs['parser'] = LxmlHtmlParser() @@ -38,13 +38,16 @@ class YoutubeBrowser(Browser): return self.page.title def get_video_url(self, page_url): - result = self.openurl(page_url).read() - for _signature in re.finditer(self.regex, result): - signature = _signature.group(1) - break + def find_video_signature(data): + for video_signature in re.finditer(self.video_signature_regex, data): + return video_signature.group(1) + return None + data = self.openurl(page_url).read() + video_signature = find_video_signature(data) + m = re.match(r'http://.*\.youtube\.com/watch\?v=(.+)', page_url) + if m: + video_id = m.group(1) + url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&fmt=18' % (video_id, video_signature) + return url else: return None - m = re.match(r'http://.*\.youtube\.com/watch\?v=(.+)', page_url) - video_id = m.group(1) - url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&fmt=18' % (video_id, signature) - return url From 5a8198c4414fbea9974f2031fcad157fbd8f4b22 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Thu, 15 Apr 2010 01:32:09 +0200 Subject: [PATCH 7/8] [videoob] stop on first match --- weboob/frontends/videoob/application.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/weboob/frontends/videoob/application.py b/weboob/frontends/videoob/application.py index 31b6bf71..83858656 100644 --- a/weboob/frontends/videoob/application.py +++ b/weboob/frontends/videoob/application.py @@ -32,9 +32,15 @@ class Videoob(ConsoleApplication): @ConsoleApplication.command('Get video file URL from page URL') def command_file_url(self, url): for name, backend in self.weboob.iter_backends(ICapVideoProvider): - print backend.get_video_url(url) + video_url = backend.get_video_url(url) + if video_url: + print video_url + break @ConsoleApplication.command('Get video title from page URL') def command_title(self, url): for name, backend in self.weboob.iter_backends(ICapVideoProvider): - print backend.get_video_title(url) + video_title = backend.get_video_title(url) + if video_title: + print video_title + break From 010b608348d038ae288ce5bdd1308de507154f6b Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Thu, 15 Apr 2010 01:32:34 +0200 Subject: [PATCH 8/8] add backend youjizz --- weboob/backends/youjizz/__init__.py | 21 +++++++++++ weboob/backends/youjizz/backend.py | 57 +++++++++++++++++++++++++++++ weboob/backends/youjizz/browser.py | 49 +++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 weboob/backends/youjizz/__init__.py create mode 100644 weboob/backends/youjizz/backend.py create mode 100644 weboob/backends/youjizz/browser.py diff --git a/weboob/backends/youjizz/__init__.py b/weboob/backends/youjizz/__init__.py new file mode 100644 index 00000000..d7ba19fc --- /dev/null +++ b/weboob/backends/youjizz/__init__.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Roger Philibert + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +from .backend import YoujizzBackend diff --git a/weboob/backends/youjizz/backend.py b/weboob/backends/youjizz/backend.py new file mode 100644 index 00000000..1bcc632c --- /dev/null +++ b/weboob/backends/youjizz/backend.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Roger Philibert + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +from weboob.backend import Backend +from weboob.capabilities.video import ICapVideoProvider + +from .browser import YoujizzBrowser + +class YoujizzBackend(Backend, ICapVideoProvider): + NAME = 'youjizz' + MAINTAINER = 'Roger Philibert' + EMAIL = 'roger.philibert@gmail.com' + VERSION = '0.1' + DESCRIPTION = 'Youjizz videos website' + LICENSE = 'GPLv3' + + CONFIG = {} + browser = None + + def need_browser(func): + def inner(self, *args, **kwargs): + if not self.browser: + self.browser = YoujizzBrowser() + url = args[0] + if u'youjizz.com' not in url: + return None + return func(self, *args, **kwargs) + return inner + + @need_browser + def get_video_title(self, page_url): + return self.browser.get_video_title(page_url) + + @need_browser + def get_video_url(self, page_url): + return self.browser.get_video_url(page_url) + + @need_browser + def iter_page_urls(self, mozaic_url): + return self.browser.iter_page_urls(mozaic_url) diff --git a/weboob/backends/youjizz/browser.py b/weboob/backends/youjizz/browser.py new file mode 100644 index 00000000..71bf2dd0 --- /dev/null +++ b/weboob/backends/youjizz/browser.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Roger Philibert + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +from logging import error +import re + +from weboob.tools.browser import Browser +from weboob.tools.parser import LxmlHtmlParser + +class YoujizzBrowser(Browser): + video_file_regex = re.compile(r'"(http://media[^ ,]+\.flv)"') + + def __init__(self, *args, **kwargs): + kwargs['parser'] = LxmlHtmlParser() + Browser.__init__(self, *args, **kwargs) + + def iter_page_urls(self, mozaic_url): + raise NotImplementedError() + + def get_video_title(self, page_url): + raise NotImplementedError() + + def get_video_url(self, page_url): + data = self.openurl(page_url).read() + video_file_urls = re.findall(self.video_file_regex, data) + if len(video_file_urls) == 0: + return None + else: + if len(video_file_urls) > 1: + error('Many video file URL found for given URL: %s' % video_file_urls) + return video_file_urls[0] +