corrections on comments and accents striping in all recipe backends
This commit is contained in:
parent
b6c17ed90c
commit
8ec69cdcd2
6 changed files with 36 additions and 26 deletions
|
|
@ -46,7 +46,7 @@ class SevenFiftyGramsBackend(BaseBackend, ICapRecipe):
|
||||||
return self.browser.get_recipe(id)
|
return self.browser.get_recipe(id)
|
||||||
|
|
||||||
def iter_recipes(self, pattern):
|
def iter_recipes(self, pattern):
|
||||||
return self.browser.iter_recipes(strip_accents(pattern).encode('utf-8'))
|
return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8'))
|
||||||
|
|
||||||
def fill_recipe(self, recipe, fields):
|
def fill_recipe(self, recipe, fields):
|
||||||
if 'nb_person' in fields or 'instructions' in fields:
|
if 'nb_person' in fields or 'instructions' in fields:
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ class RecipePage(BasePage):
|
||||||
picture_url = NotAvailable
|
picture_url = NotAvailable
|
||||||
instructions = NotAvailable
|
instructions = NotAvailable
|
||||||
author = NotAvailable
|
author = NotAvailable
|
||||||
comments = []
|
comments = NotAvailable
|
||||||
|
|
||||||
title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1])
|
title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1])
|
||||||
main = self.parser.select(self.document.getroot(), 'div.recette_description', 1)
|
main = self.parser.select(self.document.getroot(), 'div.recette_description', 1)
|
||||||
|
|
@ -117,7 +117,10 @@ class RecipePage(BasePage):
|
||||||
if len(imgillu) > 0:
|
if len(imgillu) > 0:
|
||||||
picture_url = unicode(imgillu[0].attrib.get('src', ''))
|
picture_url = unicode(imgillu[0].attrib.get('src', ''))
|
||||||
|
|
||||||
for divcom in self.parser.select(self.document.getroot(), 'div.comment-outer'):
|
divcoms = self.parser.select(self.document.getroot(), 'div.comment-outer')
|
||||||
|
if len(divcoms) > 0:
|
||||||
|
comments = []
|
||||||
|
for divcom in divcoms:
|
||||||
comtxt = unicode(' '.join(divcom.text_content().strip().split()))
|
comtxt = unicode(' '.join(divcom.text_content().strip().split()))
|
||||||
if u'| Répondre' in comtxt:
|
if u'| Répondre' in comtxt:
|
||||||
comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '')
|
comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '')
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ class CuisineazBackend(BaseBackend, ICapRecipe):
|
||||||
def iter_recipes(self, pattern):
|
def iter_recipes(self, pattern):
|
||||||
# the search form does that so the url is clean of special chars
|
# the search form does that so the url is clean of special chars
|
||||||
# we go directly on search results by the url so we strip it too
|
# we go directly on search results by the url so we strip it too
|
||||||
return self.browser.iter_recipes(strip_accents(pattern).encode('utf-8'))
|
return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8'))
|
||||||
|
|
||||||
def fill_recipe(self, recipe, fields):
|
def fill_recipe(self, recipe, fields):
|
||||||
if 'nb_person' in fields or 'instructions' in fields:
|
if 'nb_person' in fields or 'instructions' in fields:
|
||||||
|
|
|
||||||
|
|
@ -86,7 +86,7 @@ class RecipePage(BasePage):
|
||||||
ingredients = NotAvailable
|
ingredients = NotAvailable
|
||||||
picture_url = NotAvailable
|
picture_url = NotAvailable
|
||||||
instructions = NotAvailable
|
instructions = NotAvailable
|
||||||
comments = []
|
comments = NotAvailable
|
||||||
|
|
||||||
title = unicode(self.parser.select(
|
title = unicode(self.parser.select(
|
||||||
self.document.getroot(), 'div#ficheRecette h1.fn.recetteH1', 1).text)
|
self.document.getroot(), 'div#ficheRecette h1.fn.recetteH1', 1).text)
|
||||||
|
|
@ -125,7 +125,10 @@ class RecipePage(BasePage):
|
||||||
instructions += '%s: ' % inst.text
|
instructions += '%s: ' % inst.text
|
||||||
instructions += '%s\n' % inst.getnext().text
|
instructions += '%s\n' % inst.getnext().text
|
||||||
|
|
||||||
for divcom in self.parser.select(self.document.getroot(), 'div.comment'):
|
divcoms = self.parser.select(self.document.getroot(), 'div.comment')
|
||||||
|
if len(divcoms) > 0:
|
||||||
|
comments = []
|
||||||
|
for divcom in divcoms:
|
||||||
author = unicode(self.parser.select(
|
author = unicode(self.parser.select(
|
||||||
divcom, 'div.commentAuthor span', 1).text)
|
divcom, 'div.commentAuthor span', 1).text)
|
||||||
comtxt = unicode(self.parser.select(
|
comtxt = unicode(self.parser.select(
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@ class RecipePage(BasePage):
|
||||||
ingredients = NotAvailable
|
ingredients = NotAvailable
|
||||||
picture_url = NotAvailable
|
picture_url = NotAvailable
|
||||||
instructions = NotAvailable
|
instructions = NotAvailable
|
||||||
comments = []
|
comments = NotAvailable
|
||||||
|
|
||||||
title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip())
|
title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip())
|
||||||
main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1)
|
main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1)
|
||||||
|
|
@ -87,7 +87,11 @@ class RecipePage(BasePage):
|
||||||
imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img')
|
imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img')
|
||||||
if len(imgillu) > 0:
|
if len(imgillu) > 0:
|
||||||
picture_url = unicode(imgillu[0].attrib.get('src', ''))
|
picture_url = unicode(imgillu[0].attrib.get('src', ''))
|
||||||
for divcom in self.parser.select(self.document.getroot(), 'div.m_commentaire_row'):
|
|
||||||
|
divcoms = self.parser.select(self.document.getroot(), 'div.m_commentaire_row')
|
||||||
|
if len(divcoms) > 0:
|
||||||
|
comments = []
|
||||||
|
for divcom in divcoms:
|
||||||
note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip()
|
note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip()
|
||||||
user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip()
|
user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip()
|
||||||
content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip()
|
content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip()
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,7 @@ class RecipePage(BasePage):
|
||||||
ingredients = NotAvailable
|
ingredients = NotAvailable
|
||||||
picture_url = NotAvailable
|
picture_url = NotAvailable
|
||||||
instructions = NotAvailable
|
instructions = NotAvailable
|
||||||
comments = []
|
comments = NotAvailable
|
||||||
|
|
||||||
title = unicode(self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text)
|
title = unicode(self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text)
|
||||||
main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1)
|
main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue