Fix e-hentai because of site changes

Also remove obsolete "nsfw" attribute
This commit is contained in:
Roger Philibert 2012-09-22 18:22:07 +02:00 committed by Noé Rubinstein
commit c938f7b063
3 changed files with 8 additions and 25 deletions

View file

@ -49,7 +49,7 @@ class EHentaiBrowser(BaseBrowser):
return 'http://%s/g/%s/' % (self.DOMAIN, gallery.id)
def _gallery_page(self, gallery, n):
return gallery.url + ('?p=%d' % n)
return gallery.url + ('?p='+str(n))
def search_gallery(self, pattern):
self.location(self.buildurl('/', f_search=pattern.encode('utf-8')))
@ -64,20 +64,13 @@ class EHentaiBrowser(BaseBrowser):
def iter_gallery_images(self, gallery):
self.location(gallery.url)
assert self.is_on_page(GalleryPage)
i = 0
while True:
n = self.page._next_page_link()
for n in self.page._page_numbers():
self.location(self._gallery_page(gallery, n))
assert self.is_on_page(GalleryPage)
for img in self.page.image_pages():
yield EHentaiImage(img)
if n is None:
break
i += 1
self.location(self._gallery_page(gallery, i))
assert self.is_on_page(GalleryPage)
def get_image_url(self, image):
self.location(image.id)
assert self.is_on_page(ImagePage)

View file

@ -24,7 +24,6 @@ __all_ = ['EHentaiGallery', 'EHentaiImage']
class EHentaiGallery(BaseGallery):
def __init__(self, *args, **kwargs):
BaseGallery.__init__(self, *args, **kwargs)
self.nsfw = True
class EHentaiImage(BaseImage):
def __init__(self, *args, **kwargs):

View file

@ -56,11 +56,8 @@ class GalleryPage(BasePage):
def image_pages(self):
return self.document.xpath('//div[@class="gdtm"]//a/attribute::href')
def _next_page_link(self):
try:
return self.document.xpath("//table[@class='ptt']//a[text()='>']")[0]
except IndexError:
return None
def _page_numbers(self):
return [n for n in self.document.xpath("(//table[@class='ptt'])[1]//td/text()") if re.match(r"\d+", n)]
def gallery_exists(self, gallery):
if self.document.xpath("//h1"):
@ -73,8 +70,8 @@ class GalleryPage(BasePage):
try:
gallery.original_title = self.document.xpath("//h1[@id='gj']/text()")[0]
except IndexError:
gallery.orginal_title = None
description_div = self.document.xpath("//div[@id='gds']/div")[0]
gallery.original_title = None
description_div = self.document.xpath("//div[@id='gd71']")[0]
description_html = self.parser.tostring(description_div)
gallery.description = html2text(description_html)
cardinality_string = self.document.xpath("//div[@id='gdd']//tr[td[@class='gdt1']/text()='Images:']/td[@class='gdt2']/text()")[0]
@ -98,12 +95,6 @@ class GalleryPage(BasePage):
gallery.thumbnail = Thumbnail(unicode(thumbnail_url))
def _prev_page_link(self):
try:
return self.document.xpath("//table[@class='ptt']//a[text()='<']")[0]
except IndexError:
return None
class ImagePage(BasePage):
def get_url(self):