add filter Duration

This commit is contained in:
Romain Bignon 2014-03-19 20:27:22 +01:00
commit 06d1907d3d
3 changed files with 31 additions and 52 deletions

View file

@ -18,12 +18,11 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import datetime
import re
from weboob.tools.browser2 import HTMLPage
from weboob.tools.browser2.page import ListElement, method, ItemElement
from weboob.tools.browser2.filters import Filter, Link, CleanText
from weboob.tools.browser2.filters import Filter, Link, CleanText, Duration
from weboob.capabilities.image import BaseImage
from weboob.capabilities.video import BaseVideo
@ -45,23 +44,6 @@ class IndexPage(HTMLPage):
def filter(self, link):
return re.sub(r'/videos/(.+)\.html', r'\1', link)
class Duration(Filter):
def filter(self, txt):
time_txt = txt.replace(';', ':')
hours, minutes, seconds = 0, 0, 0
if ':' in time_txt:
t = time_txt.split(':')
t.reverse()
seconds = int(t[0])
minutes = int(t[1])
if len(t) == 3:
hours = int(t[2])
elif time_txt != 'N/A':
raise ValueError('Unable to parse the video duration: %s' % time_txt)
return datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
obj_id = Id(Link('.//a'))
obj_title = CleanText('.//span[@id="title1"]')
obj_duration = Duration(CleanText('.//span[@class="thumbtime"]//span'))

View file

@ -18,13 +18,11 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import datetime
import re
from weboob.tools.browser2 import HTMLPage
from weboob.tools.browser2.page import method, ItemElement
from weboob.tools.browser2.filters import CleanText, Env
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser2.filters import CleanText, Env, Duration
from weboob.capabilities.video import BaseVideo
from weboob.tools.misc import to_unicode
@ -41,19 +39,7 @@ class VideoPage(HTMLPage):
obj_title = CleanText('//title')
obj_nsfw = True
obj_ext = u'flv'
def obj_duration(self):
# youjizz HTML is crap, we must parse it with regexps
m = re.search(r'<strong>.*?Runtime.*?</strong> (.+?)</div>', self.page.response.text)
if m:
txt = m.group(1).strip()
if txt == 'Unknown':
return NotAvailable
else:
minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
return datetime.timedelta(minutes=minutes, seconds=seconds)
else:
raise ValueError('Unable to retrieve video duration')
obj_duration = Duration(CleanText('//div[@id="video_text"]'))
def obj_url(self):
real_id = int(self.env['id'].split('-')[-1])