diff --git a/modules/ehentai/browser.py b/modules/ehentai/browser.py index d8b5e6e5..5fb386fd 100644 --- a/modules/ehentai/browser.py +++ b/modules/ehentai/browser.py @@ -49,7 +49,7 @@ class EHentaiBrowser(BaseBrowser): return 'http://%s/g/%s/' % (self.DOMAIN, gallery.id) def _gallery_page(self, gallery, n): - return gallery.url + ('?p=%d' % n) + return gallery.url + ('?p='+str(n)) def search_gallery(self, pattern): self.location(self.buildurl('/', f_search=pattern.encode('utf-8'))) @@ -64,20 +64,13 @@ class EHentaiBrowser(BaseBrowser): def iter_gallery_images(self, gallery): self.location(gallery.url) assert self.is_on_page(GalleryPage) - i = 0 - while True: - n = self.page._next_page_link() + for n in self.page._page_numbers(): + self.location(self._gallery_page(gallery, n)) + assert self.is_on_page(GalleryPage) for img in self.page.image_pages(): yield EHentaiImage(img) - if n is None: - break - - i += 1 - self.location(self._gallery_page(gallery, i)) - assert self.is_on_page(GalleryPage) - def get_image_url(self, image): self.location(image.id) assert self.is_on_page(ImagePage) diff --git a/modules/ehentai/gallery.py b/modules/ehentai/gallery.py index dd57b0b2..4b8ff830 100644 --- a/modules/ehentai/gallery.py +++ b/modules/ehentai/gallery.py @@ -24,7 +24,6 @@ __all_ = ['EHentaiGallery', 'EHentaiImage'] class EHentaiGallery(BaseGallery): def __init__(self, *args, **kwargs): BaseGallery.__init__(self, *args, **kwargs) - self.nsfw = True class EHentaiImage(BaseImage): def __init__(self, *args, **kwargs): diff --git a/modules/ehentai/pages.py b/modules/ehentai/pages.py index 4b627e16..cbca4d86 100644 --- a/modules/ehentai/pages.py +++ b/modules/ehentai/pages.py @@ -56,11 +56,8 @@ class GalleryPage(BasePage): def image_pages(self): return self.document.xpath('//div[@class="gdtm"]//a/attribute::href') - def _next_page_link(self): - try: - return self.document.xpath("//table[@class='ptt']//a[text()='>']")[0] - except IndexError: - return None + def _page_numbers(self): + return [n for n in self.document.xpath("(//table[@class='ptt'])[1]//td/text()") if re.match(r"\d+", n)] def gallery_exists(self, gallery): if self.document.xpath("//h1"): @@ -73,8 +70,8 @@ class GalleryPage(BasePage): try: gallery.original_title = self.document.xpath("//h1[@id='gj']/text()")[0] except IndexError: - gallery.orginal_title = None - description_div = self.document.xpath("//div[@id='gds']/div")[0] + gallery.original_title = None + description_div = self.document.xpath("//div[@id='gd71']")[0] description_html = self.parser.tostring(description_div) gallery.description = html2text(description_html) cardinality_string = self.document.xpath("//div[@id='gdd']//tr[td[@class='gdt1']/text()='Images:']/td[@class='gdt2']/text()")[0] @@ -98,12 +95,6 @@ class GalleryPage(BasePage): gallery.thumbnail = Thumbnail(unicode(thumbnail_url)) - def _prev_page_link(self): - try: - return self.document.xpath("//table[@class='ptt']//a[text()='<']")[0] - except IndexError: - return None - class ImagePage(BasePage): def get_url(self):