From 8667a8c43b0eba5f5b738cf84e0d298fb44df93d Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Thu, 9 Apr 2015 16:20:00 +0200 Subject: [PATCH] [vimeo] bump to https and handle site changes --- modules/vimeo/browser.py | 6 +++--- modules/vimeo/pages.py | 33 ++++++++++++++++++++------------- modules/vimeo/test.py | 6 +++--- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/modules/vimeo/browser.py b/modules/vimeo/browser.py index b2a7f9ad..f83388c7 100644 --- a/modules/vimeo/browser.py +++ b/modules/vimeo/browser.py @@ -30,7 +30,7 @@ __all__ = ['VimeoBrowser'] class VimeoBrowser(PagesBrowser): - BASEURL = 'http://vimeo.com' + BASEURL = 'https://vimeo.com' search_page = URL(r'search/page:(?P.*)/sort:(?P.*)/format:thumbnail\?type=videos&q=(?P.*)', r'channels/(?P.*)/videos/.*?', @@ -40,8 +40,8 @@ class VimeoBrowser(PagesBrowser): categories_page = URL('categories', CategoriesPage) channels_page = URL('channels', ChannelsPage) - video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage) - video_page = URL('http://vimeo.com/(?P<_id>.*)', VideoPage) + video_url = URL(r'https://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage) + video_page = URL('https://vimeo.com/(?P<_id>.*)', VideoPage) def get_video(self, _id, video=None): try: diff --git a/modules/vimeo/pages.py b/modules/vimeo/pages.py index 537e1dfb..4e64d78f 100644 --- a/modules/vimeo/pages.py +++ b/modules/vimeo/pages.py @@ -17,6 +17,7 @@ # # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . + from weboob.capabilities.video import BaseVideo from weboob.capabilities.image import BaseImage from weboob.capabilities.collection import Collection @@ -24,14 +25,15 @@ from weboob.capabilities.collection import Collection from weboob.exceptions import ParseError from weboob.browser.elements import ItemElement, ListElement, method from weboob.browser.pages import HTMLPage, pagination, JsonPage -from weboob.browser.filters.standard import Regexp, Env, CleanText, DateTime, Duration, Field -from weboob.browser.filters.html import Attr, Link +from weboob.browser.filters.standard import Regexp, Env, CleanText, DateTime, Duration, Field, Type +from weboob.browser.filters.html import Attr, Link, CleanHTML, XPath +from weboob.browser.filters.json import Dict import re class VimeoDuration(Duration): - regexp = re.compile(r'(?P\d+)H(?P\d+)M(?P\d+)S') + _regexp = re.compile(r'PT(?P\d+)H(?P\d+)M(?P\d+)S') class SearchPage(HTMLPage): @@ -55,21 +57,26 @@ class SearchPage(HTMLPage): class VideoPage(HTMLPage): + def __init__(self, *args, **kwargs): + super(VideoPage, self).__init__(*args, **kwargs) + from weboob.tools.json import json + jsoncontent = XPath('//script[@type="application/ld+json"]/text()')(self.doc)[0] + self.doc = json.loads(jsoncontent)[0] + @method class get_video(ItemElement): klass = BaseVideo - _balise = lambda x: '//div[@itemprop="video"]/meta[@itemprop="%s"]/@content' % x - obj_id = Env('_id') - obj_title = CleanText(_balise('name')) - obj_date = DateTime(CleanText(_balise('dateCreated'))) - obj_duration = VimeoDuration(CleanText(_balise('duration'))) - obj_description = CleanText(_balise('description')) - obj_author = CleanText('//div[@itemprop="author"]/meta[@itemprop="name"]/@content') + obj_title = CleanText(Dict('name')) + obj_description = CleanHTML(Dict('description')) + obj_date = DateTime(Dict('datePublished')) + obj_duration = VimeoDuration(Dict('duration')) + obj_author = CleanText(Dict('author/name')) + obj_nsfw = Type(Dict('isFamilyFriendly'), type=bool) def obj_thumbnail(self): - thumbnail = BaseImage(CleanText('//div[@itemprop="video"]/span[@itemprop="thumbnail"]/link/@href')(self.el)) + thumbnail = BaseImage(Dict('thumbnailUrl')(self.el)) thumbnail.url = thumbnail.id return thumbnail @@ -101,13 +108,13 @@ class VideoJsonPage(JsonPage): class CategoriesPage(HTMLPage): @method class iter_categories(ListElement): - item_xpath = '//div[@class="col_large"]/section/ul/li/a' + item_xpath = '//div[@class="category_grid"]/div/a' class item(ItemElement): klass = Collection obj_id = CleanText('./@href') - obj_title = CleanText('./h2') + obj_title = CleanText('./div/div/p') def obj_split_path(self): split_path = ['vimeo-categories'] diff --git a/modules/vimeo/test.py b/modules/vimeo/test.py index 4e905d44..60d2bc7c 100644 --- a/modules/vimeo/test.py +++ b/modules/vimeo/test.py @@ -31,7 +31,7 @@ class VimeoTest(BackendTest): self.assertTrue(len(l) > 0) v = l[0] self.backend.fillobj(v, ('url',)) - self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + self.assertTrue(v.url and v.url.startswith('https://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) def test_channels(self): l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-channels']), 0, 20)) @@ -40,7 +40,7 @@ class VimeoTest(BackendTest): self.assertTrue(len(l1) > 0) v = l1[0] self.backend.fillobj(v, ('url',)) - self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + self.assertTrue(v.url and v.url.startswith('https://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) def test_categories(self): l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-categories']), 0, 20)) @@ -49,4 +49,4 @@ class VimeoTest(BackendTest): self.assertTrue(len(l1) > 0) v = l1[0] self.backend.fillobj(v, ('url',)) - self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + self.assertTrue(v.url and v.url.startswith('https://'), 'URL for video "%s" not found: %s' % (v.id, v.url))