support changes on website

This commit is contained in:
Romain Bignon 2012-05-04 19:04:35 +02:00
commit 1be12a9dbe
3 changed files with 18 additions and 24 deletions

View file

@ -18,7 +18,6 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import datetime import datetime
from weboob.capabilities.base import NotAvailable from weboob.capabilities.base import NotAvailable
@ -40,21 +39,18 @@ class IndexPage(PornPage):
thumbnail_url = a.find('img').attrib['src'] thumbnail_url = a.find('img').attrib['src']
h1 = li.find('h1') a = self.parser.select(li, './/a[@class="videoTitle"]', 1, 'xpath')
a = h1.find('a')
if a is None:
continue
url = a.attrib['href'] url = a.attrib['href']
_id = url[len('/watch/'):] _id = url[len('/watch/'):]
_id = _id[:_id.find('/')] _id = _id[:_id.find('/')]
video = YoupornVideo(int(_id)) video = YoupornVideo(int(_id))
video.title = a.text.strip() video.title = unicode(a.text.strip())
video.thumbnail = Thumbnail(thumbnail_url) video.thumbnail = Thumbnail(unicode(thumbnail_url))
hours = minutes = seconds = 0 hours = minutes = seconds = 0
div = li.cssselect('h2[class=duration]') div = li.cssselect('h2.duration')
if len(div) > 0: if len(div) > 0:
pack = [int(s) for s in div[0].text.strip().split(':')] pack = [int(s) for s in div[0].text.strip().split(':')]
if len(pack) == 3: if len(pack) == 3:
@ -64,12 +60,10 @@ class IndexPage(PornPage):
video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds) video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
div = li.cssselect('div.stars') div = li.cssselect('div.rating h2')
if div: if div:
m = re.match('.*star-(\d).*', div[0].attrib.get('class', '')) video.rating = int(div[0].text.strip('%'))
if m: video.rating_max = 100
video.rating = int(m.group(1))
video.rating_max = 5
video.set_empty_fields(NotAvailable, ('url', 'author')) video.set_empty_fields(NotAvailable, ('url', 'author'))

View file

@ -43,7 +43,7 @@ class VideoPage(PornPage):
return video return video
def get_url(self): def get_url(self):
download_div = self.parser.select(self.document.getroot(), 'div#tab-general-download ul li') download_div = self.parser.select(self.document.getroot(), 'ul.downloadList li')
if len(download_div) < 1: if len(download_div) < 1:
raise BrokenPageError('Unable to find file URL') raise BrokenPageError('Unable to find file URL')
@ -53,25 +53,25 @@ class VideoPage(PornPage):
ext = m.group(1).lower() ext = m.group(1).lower()
else: else:
ext = 'flv' ext = 'flv'
return a.attrib['href'], ext return unicode(a.attrib['href']), unicode(ext)
def get_title(self): def get_title(self):
element = self.parser.select(self.document.getroot(), '#videoCanvas h1', 1) element = self.parser.select(self.document.getroot(), '#videoCanvas h1', 1)
return element.text.strip().decode('utf-8') return element.text.strip().decode('utf-8')
def set_details(self, v): def set_details(self, v):
for li in self.parser.select(self.document.getroot(), 'div#tab-general-details ul li'): for li in self.parser.select(self.document.getroot(), 'ul.spaced li'):
span = li.find('b') span = li.find('b')
name = span.text.strip() name = span.text.strip()
value = span.tail.strip() value = span.tail.strip()
if name == 'Duration:': if name == 'Duration:':
m = re.match('((\d+)hrs)?((\d+)min)?(\d+)?', value) m = re.match('((\d+)hrs)?\s*((\d+)min)?\s*((\d+)sec)?', value)
if not m: if not m:
raise BrokenPageError('Unable to parse datetime: %r' % value) raise BrokenPageError('Unable to parse datetime: %r' % value)
hours = m.group(2) or 0 hours = m.group(2) or 0
minutes = m.group(4) or 0 minutes = m.group(4) or 0
seconds = m.group(5) or 0 seconds = m.group(6) or 0
v.duration = datetime.timedelta(hours=int(hours), v.duration = datetime.timedelta(hours=int(hours),
minutes=int(minutes), minutes=int(minutes),
seconds=int(seconds)) seconds=int(seconds))
@ -80,12 +80,12 @@ class VideoPage(PornPage):
if author is None: if author is None:
author = li.find('a') author = li.find('a')
if author is None: if author is None:
v.author = value v.author = unicode(value)
else: else:
v.author = author.text v.author = unicode(author.text)
elif name == 'Rating:': elif name == 'Rating:':
r = value.split() r = value.split()
v.rating = float(r[0]) v.rating = int(r[0].rstrip('%'))
v.rating_max = float(r[2]) v.rating_max = 100
elif name == 'Date:': elif name == 'Date:':
v.date = parse_dt(value) v.date = parse_dt(value)

View file

@ -28,11 +28,11 @@ class YoupornVideo(BaseVideo):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
BaseVideo.__init__(self, *args, **kwargs) BaseVideo.__init__(self, *args, **kwargs)
self.nsfw = True self.nsfw = True
self.ext = 'flv' self.ext = u'flv'
@classmethod @classmethod
def id2url(cls, _id): def id2url(cls, _id):
if _id.isdigit(): if _id.isdigit():
return 'http://www.youporn.com/watch/%d' % int(_id) return u'http://www.youporn.com/watch/%d' % int(_id)
else: else:
return None return None