support repositories to manage backends (closes #747)

2012-01-03 12:10:21 +01:00 · 2012-01-03 12:10:21 +01:00 · 14a7a1d362
commit 14a7a1d362
parent ef16a5b726
410 changed files with 1079 additions and 297 deletions
--- a/modules/youporn/pages/init.py
+++ b/modules/youporn/pages/init.py
--- a/modules/youporn/pages/base.py
+++ b/modules/youporn/pages/base.py
@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from weboob.tools.mech import ClientForm
+ControlNotFoundError = ClientForm.ControlNotFoundError
+
+from mechanize import FormNotFoundError
+from weboob.tools.browser import BasePage
+
+
+__all__ = ['PornPage']
+
+
+class PornPage(BasePage):
+    def on_loaded(self):
+        try:
+            self.browser.select_form(nr=0)
+            self.browser.submit(name='user_choice')
+            return False
+        except (ControlNotFoundError, FormNotFoundError):
+            return True
--- a/modules/youporn/pages/index.py
+++ b/modules/youporn/pages/index.py
@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+import datetime
+
+from .base import PornPage
+from ..video import YoupornVideo
+
+
+__all__ = ['IndexPage']
+
+
+class IndexPage(PornPage):
+    def iter_videos(self):
+        uls = self.document.getroot().cssselect("ul[class=clearfix]")
+        if not uls:
+            return
+
+        for ul in uls:
+            for li in ul.findall('li'):
+                a = li.find('a')
+                if a is None or a.find('img') is None:
+                    continue
+
+                thumbnail_url = a.find('img').attrib['src']
+
+                h1 = li.find('h1')
+                a = h1.find('a')
+                if a is None:
+                    continue
+
+                url = a.attrib['href']
+                _id = url[len('/watch/'):]
+                _id = _id[:_id.find('/')]
+                title = a.text.strip()
+
+                minutes = seconds = 0
+                div = li.cssselect('div[class=duration_views]')
+                if div:
+                    h2 = div[0].find('h2')
+                    minutes = int(h2.text.strip())
+                    seconds = int(h2.find('span').tail.strip())
+
+                rating = 0
+                rating_max = 0
+                div = li.cssselect('div[class=rating]')
+                if div:
+                    p = div[0].find('p')
+                    rating = float(p.text.strip())
+                    rating_max = float(p.find('span').text.strip()[2:])
+
+                yield YoupornVideo(int(_id),
+                                   title=title,
+                                   rating=rating,
+                                   rating_max=rating_max,
+                                   duration=datetime.timedelta(minutes=minutes, seconds=seconds),
+                                   thumbnail_url=thumbnail_url,
+                                   )
--- a/modules/youporn/pages/video.py
+++ b/modules/youporn/pages/video.py
@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+import re
+import datetime
+
+
+
+from .base import PornPage
+from ..video import YoupornVideo
+
+
+class VideoPage(PornPage):
+    def get_video(self, video=None):
+        if not PornPage.on_loaded(self):
+            return
+        if video is None:
+            video = YoupornVideo(self.group_dict['id'])
+        video.title = self.get_title()
+        video.url, video.ext = self.get_url()
+        self.set_details(video)
+        return video
+
+    def get_url(self):
+        download_div = self.parser.select(self.document.getroot(), '#download', 1)
+        a = self.parser.select(download_div, 'a', 1)
+        m = re.match('^(\w+) - .*', a.text)
+        if m:
+            ext = m.group(1).lower()
+        else:
+            ext = 'flv'
+        return a.attrib['href'], ext
+
+    def get_title(self):
+        element = self.parser.select(self.document.getroot(), '#videoArea h1', 1)
+        return unicode(element.getchildren()[0].tail).strip()
+
+    DATE_REGEXP = re.compile("\w+ (\w+) (\d+) (\d+):(\d+):(\d+) (\d+)")
+    MONTH2I = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+
+    def set_details(self, v):
+        details_div = self.parser.select(self.document.getroot(), '#details', 1)
+        for li in details_div.getiterator('li'):
+            span = li.find('span')
+            name = span.text.strip()
+            value = span.tail.strip()
+
+            if name == 'Duration:':
+                seconds = minutes = 0
+                for word in value.split():
+                    if word.endswith('min'):
+                        minutes = int(word[:word.find('min')])
+                    elif word.endswith('sec'):
+                        seconds = int(word[:word.find('sec')])
+                v.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
+            elif name == 'Submitted:':
+                author = li.find('i')
+                if author is None:
+                    author = li.find('a')
+                if author is None:
+                    v.author = value
+                else:
+                    v.author = author.text
+            elif name == 'Rating:':
+                r = value.split()
+                v.rating = float(r[0])
+                v.rating_max = float(r[2])
+            elif name == 'Date:':
+                m = self.DATE_REGEXP.match(value)
+                if m:
+                    month = self.MONTH2I.index(m.group(1))
+                    day = int(m.group(2))
+                    hour = int(m.group(3))
+                    minute = int(m.group(4))
+                    second = int(m.group(5))
+                    year = int(m.group(6))
+                    v.date = datetime.datetime(year, month, day, hour, minute, second)