From 8b274bfa609712adc00485b9c3c3dc7d1583561b Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Sun, 8 May 2011 11:02:50 +0200 Subject: [PATCH] ehentai: fix encoding and crash (closes #655,#656) --- weboob/backends/ehentai/backend.py | 16 +++++++++++++--- weboob/backends/ehentai/browser.py | 19 ++++++++++++++----- weboob/backends/ehentai/gallery.py | 4 ---- weboob/backends/ehentai/pages.py | 8 +++++++- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/weboob/backends/ehentai/backend.py b/weboob/backends/ehentai/backend.py index cc48127d..1722df84 100644 --- a/weboob/backends/ehentai/backend.py +++ b/weboob/backends/ehentai/backend.py @@ -19,6 +19,7 @@ from __future__ import with_statement +import re from weboob.capabilities.gallery import ICapGallery from weboob.tools.backend import BaseBackend from weboob.tools.misc import ratelimit @@ -60,11 +61,20 @@ class EHentaiBackend(BaseBackend, ICapGallery): return self.browser.iter_gallery_images(gallery) def get_gallery(self, _id): - return EHentaiGallery(_id) + if not re.match(r'(?i)/?\d+/[\dabcdef]+/?', _id): + return None + + gallery = EHentaiGallery(_id) + with self.browser: + if self.browser.gallery_exists(gallery): + return gallery + else: + return None def fill_gallery(self, gallery, fields): - with self.browser: - self.browser.fill_gallery(gallery, fields) + if not gallery.__iscomplete__(): + with self.browser: + self.browser.fill_gallery(gallery, fields) def fill_image(self, image, fields): with self.browser: diff --git a/weboob/backends/ehentai/browser.py b/weboob/backends/ehentai/browser.py index a8c206ae..991fae72 100644 --- a/weboob/backends/ehentai/browser.py +++ b/weboob/backends/ehentai/browser.py @@ -18,7 +18,7 @@ # along with weboob. If not, see . from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword -from urllib import urlencode +from urllib import urlencode, quote from .pages import IndexPage, GalleryPage, ImagePage, HomePage, LoginPage from .gallery import EHentaiImage @@ -45,11 +45,14 @@ class EHentaiBrowser(BaseBrowser): if password: self.login(username, password) + def _gallery_url(self, gallery): + return 'http://%s/g/%s/' % (self.DOMAIN, gallery.id) + def _gallery_page(self, gallery, n): return gallery.url + ('?p=%d' % n) def iter_search_results(self, pattern): - self.location(self.buildurl('/', f_search=pattern)) + self.location(self.buildurl('/', f_search=pattern.encode('utf-8'))) assert self.is_on_page(IndexPage) return self.page.iter_galleries() @@ -75,10 +78,16 @@ class EHentaiBrowser(BaseBrowser): assert self.is_on_page(ImagePage) return self.page.get_url() - def fill_gallery(self, gallery, fields): - self.location(gallery.id) + def gallery_exists(self, gallery): + gallery.url = self._gallery_url(gallery) + self.location(gallery.url) + assert self.is_on_page(GalleryPage) + return self.page.gallery_exists(gallery) + + def fill_gallery(self, gallery, fields): + gallery.url = self._gallery_url(gallery) + self.location(gallery.url) assert self.is_on_page(GalleryPage) - gallery.url = gallery.id self.page.fill_gallery(gallery) def login(self, username, password): diff --git a/weboob/backends/ehentai/gallery.py b/weboob/backends/ehentai/gallery.py index dcbbc85b..dd57b0b2 100644 --- a/weboob/backends/ehentai/gallery.py +++ b/weboob/backends/ehentai/gallery.py @@ -26,10 +26,6 @@ class EHentaiGallery(BaseGallery): BaseGallery.__init__(self, *args, **kwargs) self.nsfw = True - def iter_image(self): - self.browser.iter_gallery_images() - - class EHentaiImage(BaseImage): def __init__(self, *args, **kwargs): BaseImage.__init__(self, *args, **kwargs) diff --git a/weboob/backends/ehentai/pages.py b/weboob/backends/ehentai/pages.py index e90ebeb8..ecb98699 100644 --- a/weboob/backends/ehentai/pages.py +++ b/weboob/backends/ehentai/pages.py @@ -48,7 +48,7 @@ class IndexPage(BasePage): a = line.xpath('.//div[@class="it3"]/a')[-1] url = a.attrib["href"] title = a.text.strip() - yield EHentaiGallery(url, title=title) + yield EHentaiGallery(re.search('(?<=/g/)\d+/[\dabcdef]+', url).group(0), title=title) class GalleryPage(BasePage): def image_pages(self): @@ -60,6 +60,12 @@ class GalleryPage(BasePage): except IndexError: return None + def gallery_exists(self, gallery): + if self.document.xpath("//h1"): + return True + else: + return False + def fill_gallery(self, gallery): gallery.title = self.document.xpath("//h1[@id='gn']/text()")[0] try: