- parse video xml with lxml instead of regexpes
- add a prefix to IDs to know what kind of object it is
This commit is contained in:
Romain Bignon 2013-05-24 18:19:51 +02:00
commit 4ee284b2ab
5 changed files with 38 additions and 24 deletions

View file

@ -46,8 +46,21 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
def create_default_browser(self): def create_default_browser(self):
return self.create_browser(lang=self.config['lang'].get(), quality=self.config['quality'].get()) return self.create_browser(lang=self.config['lang'].get(), quality=self.config['quality'].get())
def split_id(self, _id):
try:
site, _id = _id.split('.', 1)
except ValueError:
site = 'videos'
return site, _id
def get_video(self, _id): def get_video(self, _id):
with self.browser: with self.browser:
site, _id = self.split_id(_id)
if site == 'live':
return self.browser.get_live_video(_id)
else:
return self.browser.get_video(_id) return self.browser.get_video(_id)
def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
@ -58,10 +71,12 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
if fields != ['thumbnail']: if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields # if we don't want only the thumbnail, we probably want also every fields
with self.browser: with self.browser:
site, _id = self.split_id(video.id)
if isinstance(video,ArteVideo): if isinstance(video,ArteVideo):
video = self.browser.get_video(ArteVideo.id2url(video.id), video) video = self.browser.get_video(_id, video)
if isinstance(video,ArteLiveVideo): if isinstance(video,ArteLiveVideo):
video = self.browser.get_live_video(ArteLiveVideo.id2url(video.id), video) video = self.browser.get_live_video(_id, video)
if 'thumbnail' in fields and video and video.thumbnail: if 'thumbnail' in fields and video and video.thumbnail:
with self.browser: with self.browser:
video.thumbnail.data = self.browser.readurl(video.thumbnail.url) video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
@ -84,7 +99,7 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
yield categorie yield categorie
if collection.path_level == 2: if collection.path_level == 2:
if collection.split_path[0] == u'live': if collection.split_path[0] == u'live':
for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename)): for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.lang)):
yield video yield video
def validate_collection(self, objs, collection): def validate_collection(self, objs, collection):

View file

@ -22,7 +22,7 @@ from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url from weboob.tools.browser.decorators import id2url
from .pages import IndexPage, VideoPage, ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage from .pages import IndexPage, VideoPage, ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage
from .video import ArteVideo from .video import ArteVideo, ArteLiveVideo
__all__ = ['ArteBrowser'] __all__ = ['ArteBrowser']
@ -42,22 +42,23 @@ class ArteBrowser(BaseBrowser):
SEARCH_LANG = {'fr': 'recherche', 'de': 'suche', 'en': 'search'} SEARCH_LANG = {'fr': 'recherche', 'de': 'suche', 'en': 'search'}
def __init__(self, lang, quality, *args, **kwargs): def __init__(self, lang, quality, *args, **kwargs):
BaseBrowser.__init__(self, *args, **kwargs)
self.lang = lang self.lang = lang
self.quality = quality self.quality = quality
BaseBrowser.__init__(self, *args, **kwargs)
@id2url(ArteVideo.id2url) @id2url(ArteVideo.id2url)
def get_video(self, url, video=None): def get_video(self, url, video=None):
self.location(url) self.location(url)
return self.page.get_video(video, self.lang, self.quality) return self.page.get_video(video, self.lang, self.quality)
@id2url(ArteLiveVideo.id2url)
def get_live_video(self, url, video=None): def get_live_video(self, url, video=None):
self.location(url) self.location(url)
assert self.is_on_page(ArteLiveVideoPage) assert self.is_on_page(ArteLiveVideoPage)
return self.page.get_video(url, video, self.lang, self.quality) return self.page.get_video(video, self.lang, self.quality)
def home(self): def home(self):
self.location('http://videos.arte.tv/fr/videos/toutesLesVideos') self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang)
def search_videos(self, pattern): def search_videos(self, pattern):
self.location(self.buildurl('/%s/do_search/videos/%s' % (self.lang, self.SEARCH_LANG[self.lang]), q=pattern.encode('utf-8'))) self.location(self.buildurl('/%s/do_search/videos/%s' % (self.lang, self.SEARCH_LANG[self.lang]), q=pattern.encode('utf-8')))

View file

@ -23,5 +23,5 @@ __all__ = ['ArteLiveCollection']
class ArteLiveCollection(Collection): class ArteLiveCollection(Collection):
@classmethod @classmethod
def id2url(cls, _id): def id2url(cls, _id, lang):
return 'http://liveweb.arte.tv/fr/cat/%s/' % _id return 'http://liveweb.arte.tv/%s/cat/%s/' % (lang, _id)

View file

@ -33,24 +33,19 @@ from .collection import ArteLiveCollection
__all__ = ['IndexPage', 'VideoPage', 'ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage'] __all__ = ['IndexPage', 'VideoPage', 'ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
class ArteLiveVideoPage(BasePage): class ArteLiveVideoPage(BasePage):
def get_video(self, url, video=None, lang='fr', quality='hd'): def get_video(self, video=None, lang='fr', quality='hd'):
if not video: if not video:
video = ArteVideo(self.group_dict['id']) video = ArteVideo(self.group_dict['id'])
HD = re.compile("(?<=<urlHd>)(.*)(?=</urlHd>)", re.DOTALL)
SD = re.compile("(?<=<urlSd>)(.*)(?=</urlSd>)", re.DOTALL)
page = self.browser.readurl(url)
urls = {} urls = {}
try: for url in self.document.xpath('//video')[0].getchildren():
urls['hd'] = u'%s' %HD.search(page).group(0).split('?')[0] if url.tag.startswith('url'):
except AttributeError: urls[url.tag[-2:]] = url.text
urls['hd'] = None
try: if quality in urls:
urls['sd'] = u'%s' %SD.search(page).group(0).split('?')[0]
except AttributeError:
urls['sd'] = None
video.url = urls[quality] video.url = urls[quality]
else:
video.url = urls.popitem()[1]
return video return video
class ArteLiveCategorieVideoPage(BasePage): class ArteLiveCategorieVideoPage(BasePage):

View file

@ -31,6 +31,9 @@ class ArteVideo(BaseVideo):
class ArteLiveVideo(BaseVideo): class ArteLiveVideo(BaseVideo):
def __init__(self, _id, *args, **kwargs):
BaseVideo.__init__(self, 'live.%s' % _id, *args, **kwargs)
@classmethod @classmethod
def id2url(cls, _id): def id2url(cls, _id):
return 'http://arte.vo.llnwd.net/o21/liveweb/events/event-%s.xml' % _id return 'http://arte.vo.llnwd.net/o21/liveweb/events/event-%s.xml' % _id