ehentai: fix encoding and crash (closes #655,#656)

This commit is contained in:
Roger Philibert 2011-05-08 11:02:50 +02:00 committed by Romain Bignon
commit 8b274bfa60
4 changed files with 33 additions and 12 deletions

View file

@ -19,6 +19,7 @@
from __future__ import with_statement from __future__ import with_statement
import re
from weboob.capabilities.gallery import ICapGallery from weboob.capabilities.gallery import ICapGallery
from weboob.tools.backend import BaseBackend from weboob.tools.backend import BaseBackend
from weboob.tools.misc import ratelimit from weboob.tools.misc import ratelimit
@ -60,11 +61,20 @@ class EHentaiBackend(BaseBackend, ICapGallery):
return self.browser.iter_gallery_images(gallery) return self.browser.iter_gallery_images(gallery)
def get_gallery(self, _id): def get_gallery(self, _id):
return EHentaiGallery(_id) if not re.match(r'(?i)/?\d+/[\dabcdef]+/?', _id):
return None
gallery = EHentaiGallery(_id)
with self.browser:
if self.browser.gallery_exists(gallery):
return gallery
else:
return None
def fill_gallery(self, gallery, fields): def fill_gallery(self, gallery, fields):
with self.browser: if not gallery.__iscomplete__():
self.browser.fill_gallery(gallery, fields) with self.browser:
self.browser.fill_gallery(gallery, fields)
def fill_image(self, image, fields): def fill_image(self, image, fields):
with self.browser: with self.browser:

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
from urllib import urlencode from urllib import urlencode, quote
from .pages import IndexPage, GalleryPage, ImagePage, HomePage, LoginPage from .pages import IndexPage, GalleryPage, ImagePage, HomePage, LoginPage
from .gallery import EHentaiImage from .gallery import EHentaiImage
@ -45,11 +45,14 @@ class EHentaiBrowser(BaseBrowser):
if password: if password:
self.login(username, password) self.login(username, password)
def _gallery_url(self, gallery):
return 'http://%s/g/%s/' % (self.DOMAIN, gallery.id)
def _gallery_page(self, gallery, n): def _gallery_page(self, gallery, n):
return gallery.url + ('?p=%d' % n) return gallery.url + ('?p=%d' % n)
def iter_search_results(self, pattern): def iter_search_results(self, pattern):
self.location(self.buildurl('/', f_search=pattern)) self.location(self.buildurl('/', f_search=pattern.encode('utf-8')))
assert self.is_on_page(IndexPage) assert self.is_on_page(IndexPage)
return self.page.iter_galleries() return self.page.iter_galleries()
@ -75,10 +78,16 @@ class EHentaiBrowser(BaseBrowser):
assert self.is_on_page(ImagePage) assert self.is_on_page(ImagePage)
return self.page.get_url() return self.page.get_url()
def fill_gallery(self, gallery, fields): def gallery_exists(self, gallery):
self.location(gallery.id) gallery.url = self._gallery_url(gallery)
self.location(gallery.url)
assert self.is_on_page(GalleryPage)
return self.page.gallery_exists(gallery)
def fill_gallery(self, gallery, fields):
gallery.url = self._gallery_url(gallery)
self.location(gallery.url)
assert self.is_on_page(GalleryPage) assert self.is_on_page(GalleryPage)
gallery.url = gallery.id
self.page.fill_gallery(gallery) self.page.fill_gallery(gallery)
def login(self, username, password): def login(self, username, password):

View file

@ -26,10 +26,6 @@ class EHentaiGallery(BaseGallery):
BaseGallery.__init__(self, *args, **kwargs) BaseGallery.__init__(self, *args, **kwargs)
self.nsfw = True self.nsfw = True
def iter_image(self):
self.browser.iter_gallery_images()
class EHentaiImage(BaseImage): class EHentaiImage(BaseImage):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
BaseImage.__init__(self, *args, **kwargs) BaseImage.__init__(self, *args, **kwargs)

View file

@ -48,7 +48,7 @@ class IndexPage(BasePage):
a = line.xpath('.//div[@class="it3"]/a')[-1] a = line.xpath('.//div[@class="it3"]/a')[-1]
url = a.attrib["href"] url = a.attrib["href"]
title = a.text.strip() title = a.text.strip()
yield EHentaiGallery(url, title=title) yield EHentaiGallery(re.search('(?<=/g/)\d+/[\dabcdef]+', url).group(0), title=title)
class GalleryPage(BasePage): class GalleryPage(BasePage):
def image_pages(self): def image_pages(self):
@ -60,6 +60,12 @@ class GalleryPage(BasePage):
except IndexError: except IndexError:
return None return None
def gallery_exists(self, gallery):
if self.document.xpath("//h1"):
return True
else:
return False
def fill_gallery(self, gallery): def fill_gallery(self, gallery):
gallery.title = self.document.xpath("//h1[@id='gn']/text()")[0] gallery.title = self.document.xpath("//h1[@id='gn']/text()")[0]
try: try: