diff --git a/weboob/backends/youporn/backend.py b/weboob/backends/youporn/backend.py index d0b31eec..880d6ee2 100644 --- a/weboob/backends/youporn/backend.py +++ b/weboob/backends/youporn/backend.py @@ -44,11 +44,15 @@ class YoupornBackend(Backend, ICapVideoProvider): def need_url(func): def inner(self, *args, **kwargs): url = args[0] - if u'youporn.com' not in url: + if isinstance(url, (str,unicode)) and not url.isdigit() and u'youporn.com' not in url: return None return func(self, *args, **kwargs) return inner + @need_url + def get_video(self, _id): + return self.browser.get_video(_id) + @need_url def iter_page_urls(self, mozaic_url): raise NotImplementedError() diff --git a/weboob/backends/youporn/browser.py b/weboob/backends/youporn/browser.py index 181dee3c..7c67f478 100644 --- a/weboob/backends/youporn/browser.py +++ b/weboob/backends/youporn/browser.py @@ -36,10 +36,20 @@ class YoupornBrowser(Browser): # Disallow arguments Browser.__init__(self) - def get_video_title(self, page_url): - self.location(page_url) - return self.page.title + def id2url(self, _id): + if isinstance(_id, int) or isinstance(_id, (str,unicode)) and _id.isdigit(): + return 'http://www.youporn.com/watch/%d' % _id + else: + return str(_id) - def get_video_url(self, page_url): - self.location(page_url) - return self.page.url + def get_video(self, _id): + self.location(self.id2url(_id)) + return self.page.video + + def get_video_title(self, _id): + self.location(self.id2url(_id)) + return self.page.video.title + + def get_video_url(self, _id): + self.location(self.id2url(_id)) + return self.page.video.url diff --git a/weboob/backends/youporn/pages/video.py b/weboob/backends/youporn/pages/video.py index e5afba82..48a0fded 100644 --- a/weboob/backends/youporn/pages/video.py +++ b/weboob/backends/youporn/pages/video.py @@ -18,19 +18,75 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ +import re +import datetime +from logging import warning + from .base import PornPage +from weboob.capabilities.video import Video class VideoPage(PornPage): + URL_REGEXP = re.compile("https?://[w\.]*youporn.com/watch/(\d+)/?.*") + def loaded(self): if not PornPage.loaded(self): return + self.video = Video(self.get_id(), + self.get_title(), + self.get_url()) + + self.set_details(self.video) + + def get_id(self): + m = self.URL_REGEXP.match(self.url) + if m: + return int(m.group(1)) + warning("Unable to parse ID") + return 0 + + def get_url(self): el = self.document.getroot().cssselect('div[id=download]') if el: - self.url = el[0].cssselect('a')[0].attrib['href'] - else: - self.url = None + return el[0].cssselect('a')[0].attrib['href'] + def get_title(self): el = self.document.getroot().cssselect('h1') if el: - self.title = unicode(el[0].getchildren()[0].tail).strip() + return unicode(el[0].getchildren()[0].tail).strip() + + DATE_REGEXP = re.compile("\w+ (\w+) (\d+) (\d+):(\d+):(\d+) (\d+)") + MONTH2I = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + + def set_details(self, v): + div = self.document.getroot().cssselect('div[id=details]') + if not div: + return + + for li in div[0].getiterator('li'): + span = li.find('span') + name = span.text.strip() + value = span.tail.strip() + + if name == 'Duration:': + duration = 0 + for word in value.split(): + if word.endswith('min'): + duration += 60 * int(word[:word.find('min')]) + elif word.endswith('sec'): + duration += int(word[:word.find('sec')]) + v.duration = duration + elif name == 'Submitted:': + v.author = li.find('i').text + elif name == 'Rating:': + v.rating = float(value[:value.find(' ')]) + elif name == 'Date:': + m = self.DATE_REGEXP.match(value) + if m: + month = self.MONTH2I.index(m.group(1)) + day = int(m.group(2)) + hour = int(m.group(3)) + minute = int(m.group(4)) + second = int(m.group(5)) + year = int(m.group(6)) + v.date = datetime.datetime(year, month, day, hour, minute, second)