update arte module to use arte api

This commit is contained in:
Bezleputh 2013-09-16 20:48:58 +02:00 committed by Florent
commit a5d5011979
5 changed files with 167 additions and 155 deletions

View file

@ -18,19 +18,17 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import datetime
import re
import urllib
import HTMLParser
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.browser import BasePage
from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.capabilities import NotAvailable
from .video import ArteVideo, ArteLiveVideo
from .video import ArteLiveVideo
from .collection import ArteLiveCollection
__all__ = ['IndexPage', 'VideoPage', 'ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
__all__ = ['ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
class ArteLiveVideoPage(BasePage):
@ -44,9 +42,9 @@ class ArteLiveVideoPage(BasePage):
urls[url.tag[-2:]] = url.text
if quality in urls:
video.url = urls[quality]
video.url = u'%s' % urls[quality]
else:
video.url = urls.popitem()[1]
video.url = u'%s' % urls.popitem()[1]
return video
@ -127,98 +125,3 @@ class ArteLivePage(BasePage):
item = ArteLiveCollection([u'live', u'%s' % _id], u'%s' % (el.find('a').text))
items.append(item)
return items
class IndexPage(BasePage):
def iter_videos(self):
videos = self.document.getroot().cssselect("div[class=video]")
for div in videos:
title = div.find('h2').find('a').text
m = re.match(r'/(fr|de|en)/videos/(.*)\.html', div.find('h2').find('a').attrib['href'])
_id = ''
if m:
_id = m.group(2)
rating = rating_max = 0
rates = self.parser.select(div, 'div[class=rateContainer]', 1)
for r in rates.findall('div'):
if 'star-rating-on' in r.attrib['class']:
rating += 1
rating_max += 1
video = ArteVideo(_id)
video.title = unicode(title)
video.rating = rating
video.rating_max = rating_max
thumb = self.parser.select(div, 'img[class=thumbnail]', 1)
video.thumbnail = Thumbnail(u'http://videos.arte.tv' + thumb.attrib['src'])
try:
parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':')
if len(parts) == 2:
hours = 0
minutes, seconds = parts
elif len(parts) == 3:
hours, minutes, seconds = parts
else:
raise BrokenPageError('Unable to parse duration %r' % parts)
except BrokenPageError:
pass
else:
video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
video.set_empty_fields(NotAvailable, ('url',))
yield video
class VideoPage(BasePage):
def get_video(self, video=None, lang='fr', quality='hd'):
if not video:
video = ArteVideo(self.group_dict['id'])
video.title = unicode(self.get_title())
video.url = unicode(self.get_url(lang, quality))
video.set_empty_fields(NotAvailable)
return video
def get_title(self):
return self.document.getroot().cssselect('h1')[0].text
def get_url(self, lang, quality):
obj = self.parser.select(self.document.getroot(), 'object', 1)
movie_url = self.parser.select(obj, 'param[name=movie]', 1)
xml_url = urllib.unquote(movie_url.attrib['value'].split('videorefFileUrl=')[-1])
doc = self.browser.get_document(self.browser.openurl(xml_url))
videos_list = self.parser.select(doc.getroot(), 'video')
videos = {}
for v in videos_list:
videos[v.attrib['lang']] = v.attrib['ref']
if lang in videos:
xml_url = videos[lang]
else:
xml_url = videos.popitem()[1]
doc = self.browser.get_document(self.browser.openurl(xml_url))
obj = self.parser.select(doc.getroot(), 'urls', 1)
videos_list = self.parser.select(obj, 'url')
urls = {}
for v in videos_list:
urls[v.attrib['quality']] = v.text
if quality in urls:
video_url = urls[quality]
else:
video_url = urls.popitem()[1]
return video_url
class ArteLivePlayerPage(BasePage):
def retrieve_id(self):
player_url = self.document.xpath('//div[@class="flash"]/div/object/param')[0].attrib['value']
_id = re.match('(.*)&eventId=(\d*)&(.*)', player_url)
if _id:
return u'%s' % _id.group(2)