From 8ec69cdcd2a1de6acf32152ee181bd9f81a25d3b Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Sun, 7 Apr 2013 14:14:53 +0200 Subject: [PATCH] corrections on comments and accents striping in all recipe backends --- modules/750g/backend.py | 2 +- modules/750g/pages.py | 23 +++++++++++++---------- modules/cuisineaz/backend.py | 2 +- modules/cuisineaz/pages.py | 17 ++++++++++------- modules/marmiton/pages.py | 16 ++++++++++------ modules/supertoinette/pages.py | 2 +- 6 files changed, 36 insertions(+), 26 deletions(-) diff --git a/modules/750g/backend.py b/modules/750g/backend.py index 42d8b5a2..7f580a5d 100644 --- a/modules/750g/backend.py +++ b/modules/750g/backend.py @@ -46,7 +46,7 @@ class SevenFiftyGramsBackend(BaseBackend, ICapRecipe): return self.browser.get_recipe(id) def iter_recipes(self, pattern): - return self.browser.iter_recipes(strip_accents(pattern).encode('utf-8')) + return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8')) def fill_recipe(self, recipe, fields): if 'nb_person' in fields or 'instructions' in fields: diff --git a/modules/750g/pages.py b/modules/750g/pages.py index 91420d18..910fbb58 100644 --- a/modules/750g/pages.py +++ b/modules/750g/pages.py @@ -77,7 +77,7 @@ class RecipePage(BasePage): picture_url = NotAvailable instructions = NotAvailable author = NotAvailable - comments = [] + comments = NotAvailable title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1]) main = self.parser.select(self.document.getroot(), 'div.recette_description', 1) @@ -117,15 +117,18 @@ class RecipePage(BasePage): if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) - for divcom in self.parser.select(self.document.getroot(), 'div.comment-outer'): - comtxt = unicode(' '.join(divcom.text_content().strip().split())) - if u'| Répondre' in comtxt: - comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '') - author = None - if 'par ' in comtxt: - author = comtxt.split('par ')[-1].split('|')[0] - comtxt = comtxt.replace('par %s' % author, '') - comments.append(Comment(text=comtxt, author=author)) + divcoms = self.parser.select(self.document.getroot(), 'div.comment-outer') + if len(divcoms) > 0: + comments = [] + for divcom in divcoms: + comtxt = unicode(' '.join(divcom.text_content().strip().split())) + if u'| Répondre' in comtxt: + comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '') + author = None + if 'par ' in comtxt: + author = comtxt.split('par ')[-1].split('|')[0] + comtxt = comtxt.replace('par %s' % author, '') + comments.append(Comment(text=comtxt, author=author)) links_author = self.parser.select(self.document.getroot(), 'p.auteur a.couleur_membre') if len(links_author) > 0: diff --git a/modules/cuisineaz/backend.py b/modules/cuisineaz/backend.py index 1cad58cd..f178094b 100644 --- a/modules/cuisineaz/backend.py +++ b/modules/cuisineaz/backend.py @@ -48,7 +48,7 @@ class CuisineazBackend(BaseBackend, ICapRecipe): def iter_recipes(self, pattern): # the search form does that so the url is clean of special chars # we go directly on search results by the url so we strip it too - return self.browser.iter_recipes(strip_accents(pattern).encode('utf-8')) + return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8')) def fill_recipe(self, recipe, fields): if 'nb_person' in fields or 'instructions' in fields: diff --git a/modules/cuisineaz/pages.py b/modules/cuisineaz/pages.py index a85a9c8e..9fa3af11 100644 --- a/modules/cuisineaz/pages.py +++ b/modules/cuisineaz/pages.py @@ -86,7 +86,7 @@ class RecipePage(BasePage): ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable - comments = [] + comments = NotAvailable title = unicode(self.parser.select( self.document.getroot(), 'div#ficheRecette h1.fn.recetteH1', 1).text) @@ -125,12 +125,15 @@ class RecipePage(BasePage): instructions += '%s: ' % inst.text instructions += '%s\n' % inst.getnext().text - for divcom in self.parser.select(self.document.getroot(), 'div.comment'): - author = unicode(self.parser.select( - divcom, 'div.commentAuthor span', 1).text) - comtxt = unicode(self.parser.select( - divcom, 'p', 1).text_content().strip()) - comments.append(Comment(author=author, text=comtxt)) + divcoms = self.parser.select(self.document.getroot(), 'div.comment') + if len(divcoms) > 0: + comments = [] + for divcom in divcoms: + author = unicode(self.parser.select( + divcom, 'div.commentAuthor span', 1).text) + comtxt = unicode(self.parser.select( + divcom, 'p', 1).text_content().strip()) + comments.append(Comment(author=author, text=comtxt)) spans_author = self.parser.select(self.document.getroot(), 'span.author') if len(spans_author) > 0: diff --git a/modules/marmiton/pages.py b/modules/marmiton/pages.py index 18ce42c1..ddbc037b 100644 --- a/modules/marmiton/pages.py +++ b/modules/marmiton/pages.py @@ -68,7 +68,7 @@ class RecipePage(BasePage): ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable - comments = [] + comments = NotAvailable title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip()) main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1) @@ -87,11 +87,15 @@ class RecipePage(BasePage): imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img') if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) - for divcom in self.parser.select(self.document.getroot(), 'div.m_commentaire_row'): - note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip() - user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip() - content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip() - comments.append(Comment(author=user, rate=note, text=content)) + + divcoms = self.parser.select(self.document.getroot(), 'div.m_commentaire_row') + if len(divcoms) > 0: + comments = [] + for divcom in divcoms: + note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip() + user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip() + content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip() + comments.append(Comment(author=user, rate=note, text=content)) recipe = Recipe(id, title) recipe.preparation_time = preparation_time diff --git a/modules/supertoinette/pages.py b/modules/supertoinette/pages.py index ef8466a8..e4b05625 100644 --- a/modules/supertoinette/pages.py +++ b/modules/supertoinette/pages.py @@ -71,7 +71,7 @@ class RecipePage(BasePage): ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable - comments = [] + comments = NotAvailable title = unicode(self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text) main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1)