From abcc4067d29dea253ac6779bb4683f0b09fcf702 Mon Sep 17 00:00:00 2001
From: Florent <weboob@flo.fourcot.fr>
Date: Tue, 22 Jan 2013 14:33:35 +0100
Subject: [PATCH] All dailys news in only one thread

---
 modules/presseurop/backend.py       | 24 ++++++++++++++++++++----
 modules/presseurop/browser.py       | 22 +++++++++++++++++++---
 modules/presseurop/pages/article.py | 20 +++++++++++++++++++-
 3 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/modules/presseurop/backend.py b/modules/presseurop/backend.py
index aeb27d3f..e927dc49 100644
--- a/modules/presseurop/backend.py
+++ b/modules/presseurop/backend.py
@@ -47,8 +47,24 @@ class NewspaperPresseuropBackend(GenericNewspaperBackend, ICapMessages):
         self.RSS_FEED = 'http://www.presseurop.eu/%s/rss.xml' % (self.config['lang'].get())
 
     def iter_threads(self):
+        daily = []
         for article in Newsfeed(self.RSS_FEED, self.RSSID).iter_entries():
-            thread = Thread(article.link)
-            thread.title = article.title
-            thread.date = article.datetime
-            yield(thread)
+            if "/news-brief/" in article.link:
+                day = self.browser.get_daily_date(article.link)
+                if day and (day not in daily):
+                    daily.append(day)
+                    id, title, date = self.browser.get_daily_infos(day)
+                    thread = Thread(id)
+                    thread.title = title
+                    thread.date = date
+                    yield(thread)
+                elif day is None:
+                    thread = Thread(article.link)
+                    thread.title = article.title
+                    thread.date = article.datetime
+                    yield(thread)
+            else:
+                thread = Thread(article.link)
+                thread.title = article.title
+                thread.date = article.datetime
+                yield(thread)
diff --git a/modules/presseurop/browser.py b/modules/presseurop/browser.py
index 93e4118d..7d4cac4a 100644
--- a/modules/presseurop/browser.py
+++ b/modules/presseurop/browser.py
@@ -18,7 +18,9 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 
-from .pages.article import PresseuropPage, CartoonPage, DailyTitlesPage
+from datetime import date, datetime, time
+from .pages.article import PresseuropPage, CartoonPage, DailySinglePage,\
+                           DailyTitlesPage
 from weboob.tools.browser import BaseBrowser
 from weboob.tools.ordereddict import OrderedDict
 
@@ -26,8 +28,8 @@ from weboob.tools.ordereddict import OrderedDict
 class NewspaperPresseuropBrowser(BaseBrowser):
     "NewspaperPresseuropBrowser class"
     PAGES = OrderedDict((
-             ("http://www.presseurop.eu/.*/todays-front-pages/.*", DailyTitlesPage),
-             ("http://www.presseurop.eu/.*/front-page/.*", DailyTitlesPage),
+             ("http://www.presseurop.eu/.*/news-brief/.*", DailySinglePage),
+             ("http://www.presseurop.eu/.*/today/.*", DailyTitlesPage),
              ("http://www.presseurop.eu/.*/cartoon/.*", CartoonPage),
              ("http://www.presseurop.eu/.*", PresseuropPage),
             ))
@@ -45,3 +47,17 @@ class NewspaperPresseuropBrowser(BaseBrowser):
         "return page article content"
         self.location(_id)
         return self.page.get_article(_id)
+
+    def get_daily_date(self, _id):
+        self.location(_id)
+        return self.page.get_daily_date()
+
+    def get_daily_infos(self, _id):
+        url = "http://www.presseurop.eu/fr/today/" + _id
+        self.location(url)
+        title = self.page.get_title()
+        article_date = date(*[int(x)
+            for x in _id.split('-')])
+        article_time = time(0, 0, 0)
+        article_datetime = datetime.combine(article_date, article_time)
+        return url, title, article_datetime
diff --git a/modules/presseurop/pages/article.py b/modules/presseurop/pages/article.py
index 6e0178e8..f2e169e3 100644
--- a/modules/presseurop/pages/article.py
+++ b/modules/presseurop/pages/article.py
@@ -50,7 +50,25 @@ class DailyTitlesPage(PresseuropPage):
         self.main_div = self.document.getroot()
         self.element_title_selector = "title"
         self.element_author_selector = "div[id=content-author]>a"
-        self.element_body_selector = "ul.articlebody"
+        self.element_body_selector = "section.main"
+
+    def get_body(self):
+        element_body = self.get_element_body()
+        try_drop_tree(self.parser, element_body, "li.button-social")
+        try_drop_tree(self.parser, element_body, "aside.articlerelated")
+        try_drop_tree(self.parser, element_body, "div.sharecount")
+        clean_relativ_urls(element_body, "http://presseurop.eu")
+
+
+
+class DailySinglePage(PresseuropPage):
+    def get_daily_date(self):
+        ul = self.document.getroot().xpath("//ul[@class='carousel-skin carousel-today']")
+        if len(ul) > 0:
+            link = ul[0].xpath('li/a')[0]
+            date = link.attrib['href'].split('/')[3]
+            return date
+        return None
 
 
 class CartoonPage(PresseuropPage):