ehentai: fix encoding and crash (closes #655,#656)

This commit is contained in:
Roger Philibert 2011-05-08 11:02:50 +02:00 committed by Romain Bignon
commit 8b274bfa60
4 changed files with 33 additions and 12 deletions

View file

@ -19,6 +19,7 @@
from __future__ import with_statement
import re
from weboob.capabilities.gallery import ICapGallery
from weboob.tools.backend import BaseBackend
from weboob.tools.misc import ratelimit
@ -60,11 +61,20 @@ class EHentaiBackend(BaseBackend, ICapGallery):
return self.browser.iter_gallery_images(gallery)
def get_gallery(self, _id):
return EHentaiGallery(_id)
if not re.match(r'(?i)/?\d+/[\dabcdef]+/?', _id):
return None
gallery = EHentaiGallery(_id)
with self.browser:
if self.browser.gallery_exists(gallery):
return gallery
else:
return None
def fill_gallery(self, gallery, fields):
with self.browser:
self.browser.fill_gallery(gallery, fields)
if not gallery.__iscomplete__():
with self.browser:
self.browser.fill_gallery(gallery, fields)
def fill_image(self, image, fields):
with self.browser:

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
from urllib import urlencode
from urllib import urlencode, quote
from .pages import IndexPage, GalleryPage, ImagePage, HomePage, LoginPage
from .gallery import EHentaiImage
@ -45,11 +45,14 @@ class EHentaiBrowser(BaseBrowser):
if password:
self.login(username, password)
def _gallery_url(self, gallery):
return 'http://%s/g/%s/' % (self.DOMAIN, gallery.id)
def _gallery_page(self, gallery, n):
return gallery.url + ('?p=%d' % n)
def iter_search_results(self, pattern):
self.location(self.buildurl('/', f_search=pattern))
self.location(self.buildurl('/', f_search=pattern.encode('utf-8')))
assert self.is_on_page(IndexPage)
return self.page.iter_galleries()
@ -75,10 +78,16 @@ class EHentaiBrowser(BaseBrowser):
assert self.is_on_page(ImagePage)
return self.page.get_url()
def fill_gallery(self, gallery, fields):
self.location(gallery.id)
def gallery_exists(self, gallery):
gallery.url = self._gallery_url(gallery)
self.location(gallery.url)
assert self.is_on_page(GalleryPage)
return self.page.gallery_exists(gallery)
def fill_gallery(self, gallery, fields):
gallery.url = self._gallery_url(gallery)
self.location(gallery.url)
assert self.is_on_page(GalleryPage)
gallery.url = gallery.id
self.page.fill_gallery(gallery)
def login(self, username, password):

View file

@ -26,10 +26,6 @@ class EHentaiGallery(BaseGallery):
BaseGallery.__init__(self, *args, **kwargs)
self.nsfw = True
def iter_image(self):
self.browser.iter_gallery_images()
class EHentaiImage(BaseImage):
def __init__(self, *args, **kwargs):
BaseImage.__init__(self, *args, **kwargs)

View file

@ -48,7 +48,7 @@ class IndexPage(BasePage):
a = line.xpath('.//div[@class="it3"]/a')[-1]
url = a.attrib["href"]
title = a.text.strip()
yield EHentaiGallery(url, title=title)
yield EHentaiGallery(re.search('(?<=/g/)\d+/[\dabcdef]+', url).group(0), title=title)
class GalleryPage(BasePage):
def image_pages(self):
@ -60,6 +60,12 @@ class GalleryPage(BasePage):
except IndexError:
return None
def gallery_exists(self, gallery):
if self.document.xpath("//h1"):
return True
else:
return False
def fill_gallery(self, gallery):
gallery.title = self.document.xpath("//h1[@id='gn']/text()")[0]
try: