corrections on comments and accents striping in all recipe backends

This commit is contained in:
Julien Veyssier 2013-04-07 14:14:53 +02:00
commit 8ec69cdcd2
6 changed files with 36 additions and 26 deletions

View file

@ -46,7 +46,7 @@ class SevenFiftyGramsBackend(BaseBackend, ICapRecipe):
return self.browser.get_recipe(id)
def iter_recipes(self, pattern):
return self.browser.iter_recipes(strip_accents(pattern).encode('utf-8'))
return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8'))
def fill_recipe(self, recipe, fields):
if 'nb_person' in fields or 'instructions' in fields:

View file

@ -77,7 +77,7 @@ class RecipePage(BasePage):
picture_url = NotAvailable
instructions = NotAvailable
author = NotAvailable
comments = []
comments = NotAvailable
title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1])
main = self.parser.select(self.document.getroot(), 'div.recette_description', 1)
@ -117,15 +117,18 @@ class RecipePage(BasePage):
if len(imgillu) > 0:
picture_url = unicode(imgillu[0].attrib.get('src', ''))
for divcom in self.parser.select(self.document.getroot(), 'div.comment-outer'):
comtxt = unicode(' '.join(divcom.text_content().strip().split()))
if u'| Répondre' in comtxt:
comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '')
author = None
if 'par ' in comtxt:
author = comtxt.split('par ')[-1].split('|')[0]
comtxt = comtxt.replace('par %s' % author, '')
comments.append(Comment(text=comtxt, author=author))
divcoms = self.parser.select(self.document.getroot(), 'div.comment-outer')
if len(divcoms) > 0:
comments = []
for divcom in divcoms:
comtxt = unicode(' '.join(divcom.text_content().strip().split()))
if u'| Répondre' in comtxt:
comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '')
author = None
if 'par ' in comtxt:
author = comtxt.split('par ')[-1].split('|')[0]
comtxt = comtxt.replace('par %s' % author, '')
comments.append(Comment(text=comtxt, author=author))
links_author = self.parser.select(self.document.getroot(), 'p.auteur a.couleur_membre')
if len(links_author) > 0:

View file

@ -48,7 +48,7 @@ class CuisineazBackend(BaseBackend, ICapRecipe):
def iter_recipes(self, pattern):
# the search form does that so the url is clean of special chars
# we go directly on search results by the url so we strip it too
return self.browser.iter_recipes(strip_accents(pattern).encode('utf-8'))
return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8'))
def fill_recipe(self, recipe, fields):
if 'nb_person' in fields or 'instructions' in fields:

View file

@ -86,7 +86,7 @@ class RecipePage(BasePage):
ingredients = NotAvailable
picture_url = NotAvailable
instructions = NotAvailable
comments = []
comments = NotAvailable
title = unicode(self.parser.select(
self.document.getroot(), 'div#ficheRecette h1.fn.recetteH1', 1).text)
@ -125,12 +125,15 @@ class RecipePage(BasePage):
instructions += '%s: ' % inst.text
instructions += '%s\n' % inst.getnext().text
for divcom in self.parser.select(self.document.getroot(), 'div.comment'):
author = unicode(self.parser.select(
divcom, 'div.commentAuthor span', 1).text)
comtxt = unicode(self.parser.select(
divcom, 'p', 1).text_content().strip())
comments.append(Comment(author=author, text=comtxt))
divcoms = self.parser.select(self.document.getroot(), 'div.comment')
if len(divcoms) > 0:
comments = []
for divcom in divcoms:
author = unicode(self.parser.select(
divcom, 'div.commentAuthor span', 1).text)
comtxt = unicode(self.parser.select(
divcom, 'p', 1).text_content().strip())
comments.append(Comment(author=author, text=comtxt))
spans_author = self.parser.select(self.document.getroot(), 'span.author')
if len(spans_author) > 0:

View file

@ -68,7 +68,7 @@ class RecipePage(BasePage):
ingredients = NotAvailable
picture_url = NotAvailable
instructions = NotAvailable
comments = []
comments = NotAvailable
title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip())
main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1)
@ -87,11 +87,15 @@ class RecipePage(BasePage):
imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img')
if len(imgillu) > 0:
picture_url = unicode(imgillu[0].attrib.get('src', ''))
for divcom in self.parser.select(self.document.getroot(), 'div.m_commentaire_row'):
note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip()
user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip()
content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip()
comments.append(Comment(author=user, rate=note, text=content))
divcoms = self.parser.select(self.document.getroot(), 'div.m_commentaire_row')
if len(divcoms) > 0:
comments = []
for divcom in divcoms:
note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip()
user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip()
content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip()
comments.append(Comment(author=user, rate=note, text=content))
recipe = Recipe(id, title)
recipe.preparation_time = preparation_time

View file

@ -71,7 +71,7 @@ class RecipePage(BasePage):
ingredients = NotAvailable
picture_url = NotAvailable
instructions = NotAvailable
comments = []
comments = NotAvailable
title = unicode(self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text)
main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1)