corrections on comments and accents striping in all recipe backends

This commit is contained in:
Julien Veyssier 2013-04-07 14:14:53 +02:00
commit 8ec69cdcd2
6 changed files with 36 additions and 26 deletions

View file

@ -46,7 +46,7 @@ class SevenFiftyGramsBackend(BaseBackend, ICapRecipe):
return self.browser.get_recipe(id) return self.browser.get_recipe(id)
def iter_recipes(self, pattern): def iter_recipes(self, pattern):
return self.browser.iter_recipes(strip_accents(pattern).encode('utf-8')) return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8'))
def fill_recipe(self, recipe, fields): def fill_recipe(self, recipe, fields):
if 'nb_person' in fields or 'instructions' in fields: if 'nb_person' in fields or 'instructions' in fields:

View file

@ -77,7 +77,7 @@ class RecipePage(BasePage):
picture_url = NotAvailable picture_url = NotAvailable
instructions = NotAvailable instructions = NotAvailable
author = NotAvailable author = NotAvailable
comments = [] comments = NotAvailable
title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1]) title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1])
main = self.parser.select(self.document.getroot(), 'div.recette_description', 1) main = self.parser.select(self.document.getroot(), 'div.recette_description', 1)
@ -117,7 +117,10 @@ class RecipePage(BasePage):
if len(imgillu) > 0: if len(imgillu) > 0:
picture_url = unicode(imgillu[0].attrib.get('src', '')) picture_url = unicode(imgillu[0].attrib.get('src', ''))
for divcom in self.parser.select(self.document.getroot(), 'div.comment-outer'): divcoms = self.parser.select(self.document.getroot(), 'div.comment-outer')
if len(divcoms) > 0:
comments = []
for divcom in divcoms:
comtxt = unicode(' '.join(divcom.text_content().strip().split())) comtxt = unicode(' '.join(divcom.text_content().strip().split()))
if u'| Répondre' in comtxt: if u'| Répondre' in comtxt:
comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '') comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '')

View file

@ -48,7 +48,7 @@ class CuisineazBackend(BaseBackend, ICapRecipe):
def iter_recipes(self, pattern): def iter_recipes(self, pattern):
# the search form does that so the url is clean of special chars # the search form does that so the url is clean of special chars
# we go directly on search results by the url so we strip it too # we go directly on search results by the url so we strip it too
return self.browser.iter_recipes(strip_accents(pattern).encode('utf-8')) return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8'))
def fill_recipe(self, recipe, fields): def fill_recipe(self, recipe, fields):
if 'nb_person' in fields or 'instructions' in fields: if 'nb_person' in fields or 'instructions' in fields:

View file

@ -86,7 +86,7 @@ class RecipePage(BasePage):
ingredients = NotAvailable ingredients = NotAvailable
picture_url = NotAvailable picture_url = NotAvailable
instructions = NotAvailable instructions = NotAvailable
comments = [] comments = NotAvailable
title = unicode(self.parser.select( title = unicode(self.parser.select(
self.document.getroot(), 'div#ficheRecette h1.fn.recetteH1', 1).text) self.document.getroot(), 'div#ficheRecette h1.fn.recetteH1', 1).text)
@ -125,7 +125,10 @@ class RecipePage(BasePage):
instructions += '%s: ' % inst.text instructions += '%s: ' % inst.text
instructions += '%s\n' % inst.getnext().text instructions += '%s\n' % inst.getnext().text
for divcom in self.parser.select(self.document.getroot(), 'div.comment'): divcoms = self.parser.select(self.document.getroot(), 'div.comment')
if len(divcoms) > 0:
comments = []
for divcom in divcoms:
author = unicode(self.parser.select( author = unicode(self.parser.select(
divcom, 'div.commentAuthor span', 1).text) divcom, 'div.commentAuthor span', 1).text)
comtxt = unicode(self.parser.select( comtxt = unicode(self.parser.select(

View file

@ -68,7 +68,7 @@ class RecipePage(BasePage):
ingredients = NotAvailable ingredients = NotAvailable
picture_url = NotAvailable picture_url = NotAvailable
instructions = NotAvailable instructions = NotAvailable
comments = [] comments = NotAvailable
title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip()) title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip())
main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1) main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1)
@ -87,7 +87,11 @@ class RecipePage(BasePage):
imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img') imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img')
if len(imgillu) > 0: if len(imgillu) > 0:
picture_url = unicode(imgillu[0].attrib.get('src', '')) picture_url = unicode(imgillu[0].attrib.get('src', ''))
for divcom in self.parser.select(self.document.getroot(), 'div.m_commentaire_row'):
divcoms = self.parser.select(self.document.getroot(), 'div.m_commentaire_row')
if len(divcoms) > 0:
comments = []
for divcom in divcoms:
note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip() note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip()
user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip() user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip()
content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip() content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip()

View file

@ -71,7 +71,7 @@ class RecipePage(BasePage):
ingredients = NotAvailable ingredients = NotAvailable
picture_url = NotAvailable picture_url = NotAvailable
instructions = NotAvailable instructions = NotAvailable
comments = [] comments = NotAvailable
title = unicode(self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text) title = unicode(self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text)
main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1) main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1)