[vimeo] bump to https and handle site changes

This commit is contained in:
Bezleputh 2015-04-09 16:20:00 +02:00
commit 8667a8c43b
3 changed files with 26 additions and 19 deletions

View file

@ -30,7 +30,7 @@ __all__ = ['VimeoBrowser']
class VimeoBrowser(PagesBrowser): class VimeoBrowser(PagesBrowser):
BASEURL = 'http://vimeo.com' BASEURL = 'https://vimeo.com'
search_page = URL(r'search/page:(?P<page>.*)/sort:(?P<sortby>.*)/format:thumbnail\?type=videos&q=(?P<pattern>.*)', search_page = URL(r'search/page:(?P<page>.*)/sort:(?P<sortby>.*)/format:thumbnail\?type=videos&q=(?P<pattern>.*)',
r'channels/(?P<channel>.*)/videos/.*?', r'channels/(?P<channel>.*)/videos/.*?',
@ -40,8 +40,8 @@ class VimeoBrowser(PagesBrowser):
categories_page = URL('categories', CategoriesPage) categories_page = URL('categories', CategoriesPage)
channels_page = URL('channels', ChannelsPage) channels_page = URL('channels', ChannelsPage)
video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage) video_url = URL(r'https://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
video_page = URL('http://vimeo.com/(?P<_id>.*)', VideoPage) video_page = URL('https://vimeo.com/(?P<_id>.*)', VideoPage)
def get_video(self, _id, video=None): def get_video(self, _id, video=None):
try: try:

View file

@ -17,6 +17,7 @@
# #
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo from weboob.capabilities.video import BaseVideo
from weboob.capabilities.image import BaseImage from weboob.capabilities.image import BaseImage
from weboob.capabilities.collection import Collection from weboob.capabilities.collection import Collection
@ -24,14 +25,15 @@ from weboob.capabilities.collection import Collection
from weboob.exceptions import ParseError from weboob.exceptions import ParseError
from weboob.browser.elements import ItemElement, ListElement, method from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.pages import HTMLPage, pagination, JsonPage from weboob.browser.pages import HTMLPage, pagination, JsonPage
from weboob.browser.filters.standard import Regexp, Env, CleanText, DateTime, Duration, Field from weboob.browser.filters.standard import Regexp, Env, CleanText, DateTime, Duration, Field, Type
from weboob.browser.filters.html import Attr, Link from weboob.browser.filters.html import Attr, Link, CleanHTML, XPath
from weboob.browser.filters.json import Dict
import re import re
class VimeoDuration(Duration): class VimeoDuration(Duration):
regexp = re.compile(r'(?P<hh>\d+)H(?P<mm>\d+)M(?P<ss>\d+)S') _regexp = re.compile(r'PT(?P<hh>\d+)H(?P<mm>\d+)M(?P<ss>\d+)S')
class SearchPage(HTMLPage): class SearchPage(HTMLPage):
@ -55,21 +57,26 @@ class SearchPage(HTMLPage):
class VideoPage(HTMLPage): class VideoPage(HTMLPage):
def __init__(self, *args, **kwargs):
super(VideoPage, self).__init__(*args, **kwargs)
from weboob.tools.json import json
jsoncontent = XPath('//script[@type="application/ld+json"]/text()')(self.doc)[0]
self.doc = json.loads(jsoncontent)[0]
@method @method
class get_video(ItemElement): class get_video(ItemElement):
klass = BaseVideo klass = BaseVideo
_balise = lambda x: '//div[@itemprop="video"]/meta[@itemprop="%s"]/@content' % x
obj_id = Env('_id') obj_id = Env('_id')
obj_title = CleanText(_balise('name')) obj_title = CleanText(Dict('name'))
obj_date = DateTime(CleanText(_balise('dateCreated'))) obj_description = CleanHTML(Dict('description'))
obj_duration = VimeoDuration(CleanText(_balise('duration'))) obj_date = DateTime(Dict('datePublished'))
obj_description = CleanText(_balise('description')) obj_duration = VimeoDuration(Dict('duration'))
obj_author = CleanText('//div[@itemprop="author"]/meta[@itemprop="name"]/@content') obj_author = CleanText(Dict('author/name'))
obj_nsfw = Type(Dict('isFamilyFriendly'), type=bool)
def obj_thumbnail(self): def obj_thumbnail(self):
thumbnail = BaseImage(CleanText('//div[@itemprop="video"]/span[@itemprop="thumbnail"]/link/@href')(self.el)) thumbnail = BaseImage(Dict('thumbnailUrl')(self.el))
thumbnail.url = thumbnail.id thumbnail.url = thumbnail.id
return thumbnail return thumbnail
@ -101,13 +108,13 @@ class VideoJsonPage(JsonPage):
class CategoriesPage(HTMLPage): class CategoriesPage(HTMLPage):
@method @method
class iter_categories(ListElement): class iter_categories(ListElement):
item_xpath = '//div[@class="col_large"]/section/ul/li/a' item_xpath = '//div[@class="category_grid"]/div/a'
class item(ItemElement): class item(ItemElement):
klass = Collection klass = Collection
obj_id = CleanText('./@href') obj_id = CleanText('./@href')
obj_title = CleanText('./h2') obj_title = CleanText('./div/div/p')
def obj_split_path(self): def obj_split_path(self):
split_path = ['vimeo-categories'] split_path = ['vimeo-categories']

View file

@ -31,7 +31,7 @@ class VimeoTest(BackendTest):
self.assertTrue(len(l) > 0) self.assertTrue(len(l) > 0)
v = l[0] v = l[0]
self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) self.assertTrue(v.url and v.url.startswith('https://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_channels(self): def test_channels(self):
l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-channels']), 0, 20)) l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-channels']), 0, 20))
@ -40,7 +40,7 @@ class VimeoTest(BackendTest):
self.assertTrue(len(l1) > 0) self.assertTrue(len(l1) > 0)
v = l1[0] v = l1[0]
self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) self.assertTrue(v.url and v.url.startswith('https://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_categories(self): def test_categories(self):
l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-categories']), 0, 20)) l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-categories']), 0, 20))
@ -49,4 +49,4 @@ class VimeoTest(BackendTest):
self.assertTrue(len(l1) > 0) self.assertTrue(len(l1) > 0)
v = l1[0] v = l1[0]
self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) self.assertTrue(v.url and v.url.startswith('https://'), 'URL for video "%s" not found: %s' % (v.id, v.url))