[vimeo] bump to https and handle site changes

This commit is contained in:
Bezleputh 2015-04-09 16:20:00 +02:00
commit 8667a8c43b
3 changed files with 26 additions and 19 deletions

View file

@ -30,7 +30,7 @@ __all__ = ['VimeoBrowser']
class VimeoBrowser(PagesBrowser):
BASEURL = 'http://vimeo.com'
BASEURL = 'https://vimeo.com'
search_page = URL(r'search/page:(?P<page>.*)/sort:(?P<sortby>.*)/format:thumbnail\?type=videos&q=(?P<pattern>.*)',
r'channels/(?P<channel>.*)/videos/.*?',
@ -40,8 +40,8 @@ class VimeoBrowser(PagesBrowser):
categories_page = URL('categories', CategoriesPage)
channels_page = URL('channels', ChannelsPage)
video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
video_page = URL('http://vimeo.com/(?P<_id>.*)', VideoPage)
video_url = URL(r'https://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
video_page = URL('https://vimeo.com/(?P<_id>.*)', VideoPage)
def get_video(self, _id, video=None):
try:

View file

@ -17,6 +17,7 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
from weboob.capabilities.image import BaseImage
from weboob.capabilities.collection import Collection
@ -24,14 +25,15 @@ from weboob.capabilities.collection import Collection
from weboob.exceptions import ParseError
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.pages import HTMLPage, pagination, JsonPage
from weboob.browser.filters.standard import Regexp, Env, CleanText, DateTime, Duration, Field
from weboob.browser.filters.html import Attr, Link
from weboob.browser.filters.standard import Regexp, Env, CleanText, DateTime, Duration, Field, Type
from weboob.browser.filters.html import Attr, Link, CleanHTML, XPath
from weboob.browser.filters.json import Dict
import re
class VimeoDuration(Duration):
regexp = re.compile(r'(?P<hh>\d+)H(?P<mm>\d+)M(?P<ss>\d+)S')
_regexp = re.compile(r'PT(?P<hh>\d+)H(?P<mm>\d+)M(?P<ss>\d+)S')
class SearchPage(HTMLPage):
@ -55,21 +57,26 @@ class SearchPage(HTMLPage):
class VideoPage(HTMLPage):
def __init__(self, *args, **kwargs):
super(VideoPage, self).__init__(*args, **kwargs)
from weboob.tools.json import json
jsoncontent = XPath('//script[@type="application/ld+json"]/text()')(self.doc)[0]
self.doc = json.loads(jsoncontent)[0]
@method
class get_video(ItemElement):
klass = BaseVideo
_balise = lambda x: '//div[@itemprop="video"]/meta[@itemprop="%s"]/@content' % x
obj_id = Env('_id')
obj_title = CleanText(_balise('name'))
obj_date = DateTime(CleanText(_balise('dateCreated')))
obj_duration = VimeoDuration(CleanText(_balise('duration')))
obj_description = CleanText(_balise('description'))
obj_author = CleanText('//div[@itemprop="author"]/meta[@itemprop="name"]/@content')
obj_title = CleanText(Dict('name'))
obj_description = CleanHTML(Dict('description'))
obj_date = DateTime(Dict('datePublished'))
obj_duration = VimeoDuration(Dict('duration'))
obj_author = CleanText(Dict('author/name'))
obj_nsfw = Type(Dict('isFamilyFriendly'), type=bool)
def obj_thumbnail(self):
thumbnail = BaseImage(CleanText('//div[@itemprop="video"]/span[@itemprop="thumbnail"]/link/@href')(self.el))
thumbnail = BaseImage(Dict('thumbnailUrl')(self.el))
thumbnail.url = thumbnail.id
return thumbnail
@ -101,13 +108,13 @@ class VideoJsonPage(JsonPage):
class CategoriesPage(HTMLPage):
@method
class iter_categories(ListElement):
item_xpath = '//div[@class="col_large"]/section/ul/li/a'
item_xpath = '//div[@class="category_grid"]/div/a'
class item(ItemElement):
klass = Collection
obj_id = CleanText('./@href')
obj_title = CleanText('./h2')
obj_title = CleanText('./div/div/p')
def obj_split_path(self):
split_path = ['vimeo-categories']

View file

@ -31,7 +31,7 @@ class VimeoTest(BackendTest):
self.assertTrue(len(l) > 0)
v = l[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
self.assertTrue(v.url and v.url.startswith('https://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_channels(self):
l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-channels']), 0, 20))
@ -40,7 +40,7 @@ class VimeoTest(BackendTest):
self.assertTrue(len(l1) > 0)
v = l1[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
self.assertTrue(v.url and v.url.startswith('https://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_categories(self):
l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-categories']), 0, 20))
@ -49,4 +49,4 @@ class VimeoTest(BackendTest):
self.assertTrue(len(l1) > 0)
v = l1[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
self.assertTrue(v.url and v.url.startswith('https://'), 'URL for video "%s" not found: %s' % (v.id, v.url))