corrections on comments and accents striping in all recipe backends
This commit is contained in:
parent
b6c17ed90c
commit
8ec69cdcd2
6 changed files with 36 additions and 26 deletions
|
|
@ -46,7 +46,7 @@ class SevenFiftyGramsBackend(BaseBackend, ICapRecipe):
|
|||
return self.browser.get_recipe(id)
|
||||
|
||||
def iter_recipes(self, pattern):
|
||||
return self.browser.iter_recipes(strip_accents(pattern).encode('utf-8'))
|
||||
return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8'))
|
||||
|
||||
def fill_recipe(self, recipe, fields):
|
||||
if 'nb_person' in fields or 'instructions' in fields:
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ class RecipePage(BasePage):
|
|||
picture_url = NotAvailable
|
||||
instructions = NotAvailable
|
||||
author = NotAvailable
|
||||
comments = []
|
||||
comments = NotAvailable
|
||||
|
||||
title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1])
|
||||
main = self.parser.select(self.document.getroot(), 'div.recette_description', 1)
|
||||
|
|
@ -117,15 +117,18 @@ class RecipePage(BasePage):
|
|||
if len(imgillu) > 0:
|
||||
picture_url = unicode(imgillu[0].attrib.get('src', ''))
|
||||
|
||||
for divcom in self.parser.select(self.document.getroot(), 'div.comment-outer'):
|
||||
comtxt = unicode(' '.join(divcom.text_content().strip().split()))
|
||||
if u'| Répondre' in comtxt:
|
||||
comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '')
|
||||
author = None
|
||||
if 'par ' in comtxt:
|
||||
author = comtxt.split('par ')[-1].split('|')[0]
|
||||
comtxt = comtxt.replace('par %s' % author, '')
|
||||
comments.append(Comment(text=comtxt, author=author))
|
||||
divcoms = self.parser.select(self.document.getroot(), 'div.comment-outer')
|
||||
if len(divcoms) > 0:
|
||||
comments = []
|
||||
for divcom in divcoms:
|
||||
comtxt = unicode(' '.join(divcom.text_content().strip().split()))
|
||||
if u'| Répondre' in comtxt:
|
||||
comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '')
|
||||
author = None
|
||||
if 'par ' in comtxt:
|
||||
author = comtxt.split('par ')[-1].split('|')[0]
|
||||
comtxt = comtxt.replace('par %s' % author, '')
|
||||
comments.append(Comment(text=comtxt, author=author))
|
||||
|
||||
links_author = self.parser.select(self.document.getroot(), 'p.auteur a.couleur_membre')
|
||||
if len(links_author) > 0:
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ class CuisineazBackend(BaseBackend, ICapRecipe):
|
|||
def iter_recipes(self, pattern):
|
||||
# the search form does that so the url is clean of special chars
|
||||
# we go directly on search results by the url so we strip it too
|
||||
return self.browser.iter_recipes(strip_accents(pattern).encode('utf-8'))
|
||||
return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8'))
|
||||
|
||||
def fill_recipe(self, recipe, fields):
|
||||
if 'nb_person' in fields or 'instructions' in fields:
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ class RecipePage(BasePage):
|
|||
ingredients = NotAvailable
|
||||
picture_url = NotAvailable
|
||||
instructions = NotAvailable
|
||||
comments = []
|
||||
comments = NotAvailable
|
||||
|
||||
title = unicode(self.parser.select(
|
||||
self.document.getroot(), 'div#ficheRecette h1.fn.recetteH1', 1).text)
|
||||
|
|
@ -125,12 +125,15 @@ class RecipePage(BasePage):
|
|||
instructions += '%s: ' % inst.text
|
||||
instructions += '%s\n' % inst.getnext().text
|
||||
|
||||
for divcom in self.parser.select(self.document.getroot(), 'div.comment'):
|
||||
author = unicode(self.parser.select(
|
||||
divcom, 'div.commentAuthor span', 1).text)
|
||||
comtxt = unicode(self.parser.select(
|
||||
divcom, 'p', 1).text_content().strip())
|
||||
comments.append(Comment(author=author, text=comtxt))
|
||||
divcoms = self.parser.select(self.document.getroot(), 'div.comment')
|
||||
if len(divcoms) > 0:
|
||||
comments = []
|
||||
for divcom in divcoms:
|
||||
author = unicode(self.parser.select(
|
||||
divcom, 'div.commentAuthor span', 1).text)
|
||||
comtxt = unicode(self.parser.select(
|
||||
divcom, 'p', 1).text_content().strip())
|
||||
comments.append(Comment(author=author, text=comtxt))
|
||||
|
||||
spans_author = self.parser.select(self.document.getroot(), 'span.author')
|
||||
if len(spans_author) > 0:
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ class RecipePage(BasePage):
|
|||
ingredients = NotAvailable
|
||||
picture_url = NotAvailable
|
||||
instructions = NotAvailable
|
||||
comments = []
|
||||
comments = NotAvailable
|
||||
|
||||
title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip())
|
||||
main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1)
|
||||
|
|
@ -87,11 +87,15 @@ class RecipePage(BasePage):
|
|||
imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img')
|
||||
if len(imgillu) > 0:
|
||||
picture_url = unicode(imgillu[0].attrib.get('src', ''))
|
||||
for divcom in self.parser.select(self.document.getroot(), 'div.m_commentaire_row'):
|
||||
note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip()
|
||||
user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip()
|
||||
content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip()
|
||||
comments.append(Comment(author=user, rate=note, text=content))
|
||||
|
||||
divcoms = self.parser.select(self.document.getroot(), 'div.m_commentaire_row')
|
||||
if len(divcoms) > 0:
|
||||
comments = []
|
||||
for divcom in divcoms:
|
||||
note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip()
|
||||
user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip()
|
||||
content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip()
|
||||
comments.append(Comment(author=user, rate=note, text=content))
|
||||
|
||||
recipe = Recipe(id, title)
|
||||
recipe.preparation_time = preparation_time
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ class RecipePage(BasePage):
|
|||
ingredients = NotAvailable
|
||||
picture_url = NotAvailable
|
||||
instructions = NotAvailable
|
||||
comments = []
|
||||
comments = NotAvailable
|
||||
|
||||
title = unicode(self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text)
|
||||
main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue