From 88e1275d66894196b202dd423376d378f8204dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9=20Rubinstein?= Date: Tue, 30 Aug 2011 04:09:15 +0200 Subject: [PATCH] begin comic reader factorization - doesn't work --- .../__init__.py | 2 +- .../backend.py | 48 +++++++++++-------- .../backends/genericcomicreader/mangafox.py | 33 +++++++++++++ 3 files changed, 63 insertions(+), 20 deletions(-) rename weboob/backends/{mangafox => genericcomicreader}/__init__.py (95%) rename weboob/backends/{mangafox => genericcomicreader}/backend.py (64%) create mode 100644 weboob/backends/genericcomicreader/mangafox.py diff --git a/weboob/backends/mangafox/__init__.py b/weboob/backends/genericcomicreader/__init__.py similarity index 95% rename from weboob/backends/mangafox/__init__.py rename to weboob/backends/genericcomicreader/__init__.py index ff52fc38..58e3418a 100644 --- a/weboob/backends/mangafox/__init__.py +++ b/weboob/backends/genericcomicreader/__init__.py @@ -17,6 +17,6 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from .backend import MangafoxBackend +from .mangafox import MangafoxBackend __all__ = ['MangafoxBackend'] diff --git a/weboob/backends/mangafox/backend.py b/weboob/backends/genericcomicreader/backend.py similarity index 64% rename from weboob/backends/mangafox/backend.py rename to weboob/backends/genericcomicreader/backend.py index a20bf477..4b1ae4b5 100644 --- a/weboob/backends/mangafox/backend.py +++ b/weboob/backends/genericcomicreader/backend.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2010-2011 Roger Philibert +# Copyright(C) 2010-2011 Noé Rubinstein # # This file is part of weboob. # @@ -25,28 +25,31 @@ from weboob.capabilities.gallery import ICapGallery, BaseGallery, BaseImage from weboob.tools.backend import BaseBackend from weboob.tools.browser import BaseBrowser, BasePage -__all__ = ['MangafoxBackend'] +__all__ = ['GenericComicReaderBackend'] class DisplayPage(BasePage): def get_page(self, gallery): - src = self.document.xpath("//img[@id='image']/attribute::src")[0] + src = self.document.xpath(self.backend.IMG_SRC_XPATH)[0] return BaseImage(src, gallery=gallery, url=src) def page_list(self): - return self.document.xpath("(//select[@onchange='change_page(this)'])[1]/option/@value") + return self.document.xpath(self.backend.PAGE_LIST_XPATH) -class MangafoxBrowser(BaseBrowser): - PAGES = { r'http://.+\.mangafox.\w+/manga/[^/]+/[^/]+/([^/]+/)?.+\.html': DisplayPage } + +class GenericComicReaderBrowser(BaseBrowser): + def __init__(self, *args, **kwargs): + self.PAGES = self.backend.PAGES + BaseBrowser.__init__(self, *args, **kwargs) def iter_gallery_images(self, gallery): self.location(gallery.url) assert self.is_on_page(DisplayPage) for p in self.page.page_list(): - self.location('%s.html' % p) + self.location(self.backend.PAGE_LOCATION % p) assert self.is_on_page(DisplayPage) yield self.page.get_page(gallery) @@ -54,27 +57,33 @@ class MangafoxBrowser(BaseBrowser): if 'data' in fields: image.data = self.readurl(image.url) -class MangafoxBackend(BaseBackend, ICapGallery): - NAME = 'mangafox' - MAINTAINER = 'Roger Philibert' - EMAIL = 'roger.philibert@gmail.com' + +class GenericComicReaderBackend(BaseBackend, ICapGallery): + NAME = 'genericcomicreader' + MAINTAINER = 'Noé Rubinstein' + EMAIL = 'noe.rubinstein@gmail.com' VERSION = '0.9' - DESCRIPTION = 'Mangafox' + DESCRIPTION = 'Generic comic reader backend; subclasses implement specific sites' LICENSE = 'AGPLv3+' - BROWSER = MangafoxBrowser + BROWSER = GenericComicReaderBrowser def iter_gallery_images(self, gallery): with self.browser: return self.browser.iter_gallery_images(gallery) def get_gallery(self, _id): - match = re.match(r'(?:(?:.+mangafox.com/manga)?/)?([^/]+/[^/]+(?:/[^/]+)?)', _id) - if match is None: - return None + match = re.match(r'^%s$' % self.URL_REGEXP, _id) + if match: + _id = match.group(1) + else: + match = re.match(r'^%s$' % self.ID_REGEXP, _id) + if match: + _id = match.group(0) + else: + return None + - _id = match.group(1) - - gallery = BaseGallery(_id, url=('http://www.mangafox.com/manga/%s' % _id)) + gallery = BaseGallery(_id, url=(self.ID_TO_URL % _id)) with self.browser: return gallery @@ -88,3 +97,4 @@ class MangafoxBackend(BaseBackend, ICapGallery): OBJECTS = { BaseGallery: fill_gallery, BaseImage: fill_image } + diff --git a/weboob/backends/genericcomicreader/mangafox.py b/weboob/backends/genericcomicreader/mangafox.py new file mode 100644 index 00000000..8ae68ef8 --- /dev/null +++ b/weboob/backends/genericcomicreader/mangafox.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Noé Rubinstein +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import GenericComicReaderBackend, DisplayPage + +__all__ = ['MangafoxBackend'] + +class MangafoxBackend(GenericComicReaderBackend): + NAME = 'mangafox' + DESCRIPTION = 'Mangafox manga reading site' + IMG_SRC_XPATH = "//img[@id='image']/attribute::src" + PAGE_LIST_XPATH = "(//select[@onchange='change_page(this)'])[1]/option/@value" + PAGE_TO_LOCATION = "%s.html" + ID_TO_URL = 'http://www.mangafox.com/manga/%s' + ID_REGEXP = r'/?[^/]+/[^/]+(?:/[^/]+)?/?' + URL_REGEXP = r'.+mangafox.com/manga/(%s).+' % ID_REGEXP + PAGES = { r'http://.+\.mangafox.\w+/manga/[^/]+/[^/]+/([^/]+/)?.+\.html': DisplayPage }