Fix parsing of cartoon pages (Site changed)
Tested on version 0.b and 0.c
This commit is contained in:
parent
92d5e9ea0f
commit
e181fe4b89
2 changed files with 21 additions and 1 deletions
|
|
@ -18,13 +18,14 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from .pages.article import ArticlePage
|
from .pages.article import ArticlePage, CartoonPage
|
||||||
from weboob.tools.browser import BaseBrowser
|
from weboob.tools.browser import BaseBrowser
|
||||||
|
|
||||||
|
|
||||||
class NewspaperPresseuropBrowser(BaseBrowser):
|
class NewspaperPresseuropBrowser(BaseBrowser):
|
||||||
"NewspaperPresseuropBrowser class"
|
"NewspaperPresseuropBrowser class"
|
||||||
PAGES = {
|
PAGES = {
|
||||||
|
"http://www.presseurop.eu/.*/cartoon/.*": CartoonPage,
|
||||||
"http://www.presseurop.eu/.*": ArticlePage,
|
"http://www.presseurop.eu/.*": ArticlePage,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,3 +39,22 @@ class ArticlePage(GenericNewsPage):
|
||||||
title = GenericNewsPage.get_title(self)
|
title = GenericNewsPage.get_title(self)
|
||||||
title = title.split('|')[0]
|
title = title.split('|')[0]
|
||||||
return title
|
return title
|
||||||
|
|
||||||
|
|
||||||
|
class CartoonPage(GenericNewsPage):
|
||||||
|
"CartoonPage object for presseurop"
|
||||||
|
|
||||||
|
def on_loaded(self):
|
||||||
|
self.main_div = self.document.getroot()
|
||||||
|
self.element_title_selector = "title"
|
||||||
|
self.element_author_selector = "div.profilecartoontext>p>a"
|
||||||
|
self.element_body_selector = "div.panel"
|
||||||
|
|
||||||
|
def get_body(self):
|
||||||
|
element_body = self.get_element_body()
|
||||||
|
return self.parser.tostring(element_body)
|
||||||
|
|
||||||
|
def get_title(self):
|
||||||
|
title = GenericNewsPage.get_title(self)
|
||||||
|
title = title.split('|')[0]
|
||||||
|
return title
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue