support changes on website

This commit is contained in:
Romain Bignon 2012-05-04 19:04:35 +02:00
commit 1be12a9dbe
3 changed files with 18 additions and 24 deletions

View file

@ -18,7 +18,6 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import datetime
from weboob.capabilities.base import NotAvailable
@ -40,21 +39,18 @@ class IndexPage(PornPage):
thumbnail_url = a.find('img').attrib['src']
h1 = li.find('h1')
a = h1.find('a')
if a is None:
continue
a = self.parser.select(li, './/a[@class="videoTitle"]', 1, 'xpath')
url = a.attrib['href']
_id = url[len('/watch/'):]
_id = _id[:_id.find('/')]
video = YoupornVideo(int(_id))
video.title = a.text.strip()
video.thumbnail = Thumbnail(thumbnail_url)
video.title = unicode(a.text.strip())
video.thumbnail = Thumbnail(unicode(thumbnail_url))
hours = minutes = seconds = 0
div = li.cssselect('h2[class=duration]')
div = li.cssselect('h2.duration')
if len(div) > 0:
pack = [int(s) for s in div[0].text.strip().split(':')]
if len(pack) == 3:
@ -64,12 +60,10 @@ class IndexPage(PornPage):
video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
div = li.cssselect('div.stars')
div = li.cssselect('div.rating h2')
if div:
m = re.match('.*star-(\d).*', div[0].attrib.get('class', ''))
if m:
video.rating = int(m.group(1))
video.rating_max = 5
video.rating = int(div[0].text.strip('%'))
video.rating_max = 100
video.set_empty_fields(NotAvailable, ('url', 'author'))

View file

@ -43,7 +43,7 @@ class VideoPage(PornPage):
return video
def get_url(self):
download_div = self.parser.select(self.document.getroot(), 'div#tab-general-download ul li')
download_div = self.parser.select(self.document.getroot(), 'ul.downloadList li')
if len(download_div) < 1:
raise BrokenPageError('Unable to find file URL')
@ -53,25 +53,25 @@ class VideoPage(PornPage):
ext = m.group(1).lower()
else:
ext = 'flv'
return a.attrib['href'], ext
return unicode(a.attrib['href']), unicode(ext)
def get_title(self):
element = self.parser.select(self.document.getroot(), '#videoCanvas h1', 1)
return element.text.strip().decode('utf-8')
def set_details(self, v):
for li in self.parser.select(self.document.getroot(), 'div#tab-general-details ul li'):
for li in self.parser.select(self.document.getroot(), 'ul.spaced li'):
span = li.find('b')
name = span.text.strip()
value = span.tail.strip()
if name == 'Duration:':
m = re.match('((\d+)hrs)?((\d+)min)?(\d+)?', value)
m = re.match('((\d+)hrs)?\s*((\d+)min)?\s*((\d+)sec)?', value)
if not m:
raise BrokenPageError('Unable to parse datetime: %r' % value)
hours = m.group(2) or 0
minutes = m.group(4) or 0
seconds = m.group(5) or 0
seconds = m.group(6) or 0
v.duration = datetime.timedelta(hours=int(hours),
minutes=int(minutes),
seconds=int(seconds))
@ -80,12 +80,12 @@ class VideoPage(PornPage):
if author is None:
author = li.find('a')
if author is None:
v.author = value
v.author = unicode(value)
else:
v.author = author.text
v.author = unicode(author.text)
elif name == 'Rating:':
r = value.split()
v.rating = float(r[0])
v.rating_max = float(r[2])
v.rating = int(r[0].rstrip('%'))
v.rating_max = 100
elif name == 'Date:':
v.date = parse_dt(value)

View file

@ -28,11 +28,11 @@ class YoupornVideo(BaseVideo):
def __init__(self, *args, **kwargs):
BaseVideo.__init__(self, *args, **kwargs)
self.nsfw = True
self.ext = 'flv'
self.ext = u'flv'
@classmethod
def id2url(cls, _id):
if _id.isdigit():
return 'http://www.youporn.com/watch/%d' % int(_id)
return u'http://www.youporn.com/watch/%d' % int(_id)
else:
return None