Fix parsing of cartoon pages (Site changed)

Tested on version 0.b and 0.c
This commit is contained in:
Florent 2012-04-25 13:33:45 +02:00 committed by Romain Bignon
commit e181fe4b89
2 changed files with 21 additions and 1 deletions

View file

@ -18,13 +18,14 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .pages.article import ArticlePage
from .pages.article import ArticlePage, CartoonPage
from weboob.tools.browser import BaseBrowser
class NewspaperPresseuropBrowser(BaseBrowser):
"NewspaperPresseuropBrowser class"
PAGES = {
"http://www.presseurop.eu/.*/cartoon/.*": CartoonPage,
"http://www.presseurop.eu/.*": ArticlePage,
}

View file

@ -39,3 +39,22 @@ class ArticlePage(GenericNewsPage):
title = GenericNewsPage.get_title(self)
title = title.split('|')[0]
return title
class CartoonPage(GenericNewsPage):
"CartoonPage object for presseurop"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_title_selector = "title"
self.element_author_selector = "div.profilecartoontext>p>a"
self.element_body_selector = "div.panel"
def get_body(self):
element_body = self.get_element_body()
return self.parser.tostring(element_body)
def get_title(self):
title = GenericNewsPage.get_title(self)
title = title.split('|')[0]
return title