[750g] working, get_recipe OK
This commit is contained in:
parent
b724840ddb
commit
31a3cf589b
2 changed files with 46 additions and 24 deletions
|
|
@ -28,11 +28,11 @@ __all__ = ['SevenFiftyGramsBrowser']
|
||||||
class SevenFiftyGramsBrowser(BaseBrowser):
|
class SevenFiftyGramsBrowser(BaseBrowser):
|
||||||
DOMAIN = 'www.750g.com'
|
DOMAIN = 'www.750g.com'
|
||||||
PROTOCOL = 'http'
|
PROTOCOL = 'http'
|
||||||
ENCODING = 'utf-8'
|
ENCODING = 'windows-1252'
|
||||||
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
|
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
|
||||||
PAGES = {
|
PAGES = {
|
||||||
'http://www.750g.com/recettes_.*.htm': ResultsPage,
|
'http://www.750g.com/recettes_.*.htm': ResultsPage,
|
||||||
'http://www.750g.com/.*r[0-9]*.htm': RecipePage,
|
'http://www.750g.com/fiche_de_cuisine_complete.htm\?recettes_id=[0-9]*': RecipePage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_recipes(self, pattern):
|
def iter_recipes(self, pattern):
|
||||||
|
|
@ -41,6 +41,6 @@ class SevenFiftyGramsBrowser(BaseBrowser):
|
||||||
return self.page.iter_recipes()
|
return self.page.iter_recipes()
|
||||||
|
|
||||||
def get_recipe(self, id):
|
def get_recipe(self, id):
|
||||||
self.location('http://www.750g.com/%s.htm' % id)
|
self.location('http://www.750g.com/fiche_de_cuisine_complete.htm?recettes_id=%s' % id)
|
||||||
assert self.is_on_page(RecipePage)
|
assert self.is_on_page(RecipePage)
|
||||||
return self.page.get_recipe(id)
|
return self.page.get_recipe(id)
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,8 @@ class ResultsPage(BasePage):
|
||||||
if len(links) > 0:
|
if len(links) > 0:
|
||||||
link = links[0]
|
link = links[0]
|
||||||
title = unicode(link.text)
|
title = unicode(link.text)
|
||||||
id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm'))
|
#id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm'))
|
||||||
|
id = unicode(self.parser.select(div,'div.carnet-add a',1).attrib.get('href','').split('=')[-1])
|
||||||
thumbnail_url = NotAvailable
|
thumbnail_url = NotAvailable
|
||||||
short_description = NotAvailable
|
short_description = NotAvailable
|
||||||
|
|
||||||
|
|
@ -76,28 +77,49 @@ class RecipePage(BasePage):
|
||||||
instructions = NotAvailable
|
instructions = NotAvailable
|
||||||
comments = []
|
comments = []
|
||||||
|
|
||||||
title = unicode(self.parser.select(self.document.getroot(),'h1.m_title',1).text_content().strip())
|
title = unicode(self.parser.select(self.document.getroot(),'head > title',1).text.split(' - ')[1])
|
||||||
main = self.parser.select(self.document.getroot(),'div.m_content_recette_main',1)
|
main = self.parser.select(self.document.getroot(),'div.recette_description',1)
|
||||||
preparation_time = int(self.parser.select(main,'p.m_content_recette_info span.preptime',1).text_content())
|
|
||||||
cooking_time = int(self.parser.select(main,'p.m_content_recette_info span.cooktime',1).text_content())
|
rec_infos = self.parser.select(self.document.getroot(),'div.recette_infos div.infos_column strong')
|
||||||
ing_header_line = self.parser.select(main,'p.m_content_recette_ingredients span',1).text_content()
|
for info_title in rec_infos:
|
||||||
if '(pour' in ing_header_line and ')' in ing_header_line:
|
if u'Temps de préparation' in unicode(info_title.text):
|
||||||
nb_person = int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0])
|
if info_title.tail.strip() != '':
|
||||||
ingredients = self.parser.select(main,'p.m_content_recette_ingredients',1).text_content().strip().split('- ')
|
preparation_time = int(info_title.tail.split()[0])
|
||||||
ingredients=ingredients[1:]
|
if 'h' in info_title.tail:
|
||||||
rinstructions = self.parser.select(main,'div.m_content_recette_todo',1).text_content().strip()
|
preparation_time = 60*preparation_time
|
||||||
instructions = u''
|
if 'Temps de cuisson' in info_title.text:
|
||||||
for line in rinstructions.split('\n'):
|
if info_title.tail.strip() != '':
|
||||||
instructions += '%s\n'%line.strip()
|
cooking_time = int(info_title.tail.split()[0])
|
||||||
instructions = instructions.strip('\n')
|
if 'h' in info_title.tail:
|
||||||
imgillu = self.parser.select(self.document.getroot(),'a.m_content_recette_illu img')
|
cooking_time = 60*cooking_time
|
||||||
|
if 'Nombre de personnes' in info_title.text:
|
||||||
|
if info_title.tail.strip() != '':
|
||||||
|
nb_person = int(info_title.tail)
|
||||||
|
|
||||||
|
ingredients = []
|
||||||
|
p_ing = self.parser.select(main,'div.data.top.left > div.content p')
|
||||||
|
for ing in p_ing:
|
||||||
|
ingtxt = unicode(ing.text_content().strip())
|
||||||
|
if ingtxt != '':
|
||||||
|
ingredients.append(ingtxt)
|
||||||
|
|
||||||
|
lines_instr = self.parser.select(main,'div.data.top.right div.content li')
|
||||||
|
if len(lines_instr) > 0:
|
||||||
|
instructions = u''
|
||||||
|
for line in lines_instr:
|
||||||
|
inst = ' '.join(line.text_content().strip().split())
|
||||||
|
instructions += '%s\n'% inst
|
||||||
|
instructions = instructions.strip('\n')
|
||||||
|
|
||||||
|
imgillu = self.parser.select(self.document.getroot(),'div.resume_recette_illustree img.photo')
|
||||||
if len(imgillu) > 0:
|
if len(imgillu) > 0:
|
||||||
picture_url = unicode(imgillu[0].attrib.get('src',''))
|
picture_url = unicode(imgillu[0].attrib.get('src',''))
|
||||||
for divcom in self.parser.select(self.document.getroot(),'div.m_commentaire_row'):
|
|
||||||
note = self.parser.select(divcom,'div.m_commentaire_note span',1).text.strip()
|
for divcom in self.parser.select(self.document.getroot(),'div.comment-outer'):
|
||||||
user = self.parser.select(divcom,'div.m_commentaire_content span',1).text.strip()
|
comtxt = unicode(' '.join(divcom.text_content().strip().split()))
|
||||||
content = self.parser.select(divcom,'div.m_commentaire_content p',1).text.strip()
|
if u'| Répondre' in comtxt:
|
||||||
comments.append(u'user: %s, note: %s, comment: %s'%(user,note,content))
|
comtxt = comtxt.strip('0123456789').replace(u' | Répondre','')
|
||||||
|
comments.append(comtxt)
|
||||||
|
|
||||||
recipe = Recipe(id,title)
|
recipe = Recipe(id,title)
|
||||||
recipe.preparation_time = preparation_time
|
recipe.preparation_time = preparation_time
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue