diff --git a/modules/francetelevisions/browser.py b/modules/francetelevisions/browser.py index 1978c829..232b5fc1 100644 --- a/modules/francetelevisions/browser.py +++ b/modules/francetelevisions/browser.py @@ -32,12 +32,10 @@ __all__ = ['PluzzBrowser'] class PluzzBrowser(BaseBrowser): - DOMAIN = 'pluzz.fr' - ENCODING = 'ISO-8859-1' - PAGES = {r'http://[w\.]*pluzz.fr/replay/1': IndexPage, - r'http://[w\.]*pluzz.fr/recherche.html.*': IndexPage, - r'http://[w\.]*pluzz.fr/[-\w]+/.*': IndexPage, - r'http://[w\.]*pluzz.fr/((?!recherche).+)\.html': VideoPage, + DOMAIN = 'pluzz.francetv.fr' + PAGES = {r'http://[w\.]*pluzz.francetv.fr/replay/1': IndexPage, + r'http://[w\.]*pluzz.francetv.fr/recherche.*': IndexPage, + r'http://[w\.]*pluzz.francetv.fr/videos/(.+).html': VideoPage, } @id2url(PluzzVideo.id2url) @@ -56,10 +54,10 @@ class PluzzBrowser(BaseBrowser): return video def home(self): - self.location('/replay/1') + self.search_videos('') def search_videos(self, pattern): - self.location(self.buildurl('/recherche.html', q=pattern.encode('utf-8'))) + self.location(self.buildurl('/recherche', recherche=pattern.encode('utf-8'))) assert self.is_on_page(IndexPage) return self.page.iter_videos() @@ -75,19 +73,19 @@ class PluzzBrowser(BaseBrowser): root = etree.XML(data, parser) assert root.tag == 'oeuvre' - video.title = root.findtext('titre') + video.title = unicode(root.findtext('titre')) hours, minutes, seconds = root.findtext('duree').split(':') video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) for vid in root.find('videos'): if vid.findtext('statut') == 'ONLINE' and vid.findtext('format') == 'wmv': - video.url = vid.findtext('url') + video.url = unicode(vid.findtext('url')) date = root.findtext('diffusions/diffusion') if date: video.date = datetime.datetime.strptime(date, '%d/%m/%Y %H:%M') - video.description = root.findtext('synopsis') + video.description = unicode(root.findtext('synopsis')) return video diff --git a/modules/francetelevisions/pages.py b/modules/francetelevisions/pages.py index 9d3db11d..68381548 100644 --- a/modules/francetelevisions/pages.py +++ b/modules/francetelevisions/pages.py @@ -19,6 +19,7 @@ import datetime import re +from dateutil.parser import parse as parse_dt from weboob.capabilities import UserError from weboob.tools.capabilities.thumbnail import Thumbnail @@ -33,35 +34,22 @@ __all__ = ['IndexPage', 'VideoPage'] class IndexPage(BasePage): def iter_videos(self): - for div in self.parser.select(self.document.getroot(), 'li.vignette'): - title = self.parser.select(div, 'h4 a', 1) + for div in self.parser.select(self.document.getroot(), 'article.rs-cell'): + title = self.parser.select(div, 'h3 a', 1) url = title.attrib['href'] - m = re.match('^http://www.pluzz.fr/([^/]+)\.html$', url) + m = re.match('^http://pluzz.francetv.fr/videos/(.+).html$', url) if not m: self.logger.debug('url %s does not match' % url) continue _id = m.group(1) video = PluzzVideo(_id) - m = re.match('^(.+) - ([0-2][0-9])h([0-5][0-9])$', title.text) - if m: - video.title = m.group(1) - hour = int(m.group(2)) - minute = int(m.group(3)) - else: - video.title = title.text - hour = 0 - minute = 0 + video.title = unicode(title.text.strip()) + video.date = parse_dt(div.find('span').attrib['data-date']) + t = map(int, div.xpath('.//a[@class="rs-genre-temps"]')[0].text.split('|')[1].strip().split(':')) + video.duration = datetime.timedelta(hours=t[0], minutes=t[1]) - m = re.match('(\d+)/(\d+)/(\d+)', self.parser.select(div, 'p.date', 1).text) - if m: - video.date = datetime.datetime(int(m.group(3)), - int(m.group(2)), - int(m.group(1)), - hour, - minute) - - url = self.parser.select(div, 'img.illustration', 1).attrib['src'] - video.thumbnail = Thumbnail(u'http://www.pluzz.fr/%s' % url) + url = self.parser.select(div, 'figure.rs-cell-image img', 1).attrib['src'] + video.thumbnail = Thumbnail(url) yield video @@ -82,7 +70,7 @@ class VideoPage(BasePage): '^%s(\d+)$' % re.escape('http://info.francetelevisions.fr/?id-video='), div.attrib['href']) if m: - return r'http://www.pluzz.fr/appftv/webservices/video/getInfosOeuvre.php?mode=zeri&id-diffusion=%s' % m.group(1) + return r'http://pluzz.francetv.fr/appftv/webservices/video/getInfosOeuvre.php?mode=zeri&id-diffusion=%s' % m.group(1) def get_id(self): return self.groups[0] diff --git a/modules/francetelevisions/video.py b/modules/francetelevisions/video.py index 52d97206..4acbd859 100644 --- a/modules/francetelevisions/video.py +++ b/modules/francetelevisions/video.py @@ -31,4 +31,4 @@ class PluzzVideo(BaseVideo): @classmethod def id2url(cls, _id): - return 'http://www.pluzz.fr/%s.html' % _id + return 'http://pluzz.francetv.fr/videos/%s.html' % _id