new backend marmiton

This commit is contained in:
Julien Veyssier 2013-03-14 20:18:13 +01:00
commit 4798fdd411
8 changed files with 312 additions and 13 deletions

View file

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import MarmitonBackend
__all__ = ['MarmitonBackend']

View file

@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.recipe import ICapRecipe,Recipe
from weboob.tools.backend import BaseBackend
from .browser import MarmitonBrowser
from urllib import quote_plus
__all__ = ['MarmitonBackend']
class MarmitonBackend(BaseBackend, ICapRecipe):
NAME = 'marmiton'
MAINTAINER = u'Julien Veyssier'
EMAIL = 'julien.veyssier@aiur.fr'
VERSION = '0.f'
DESCRIPTION = 'Marmiton recipe website'
LICENSE = 'AGPLv3+'
BROWSER = MarmitonBrowser
def create_default_browser(self):
return self.create_browser()
def get_recipe(self, id):
return self.browser.get_recipe(id)
def iter_recipes(self, pattern):
return self.browser.iter_recipes(quote_plus(pattern.encode('utf-8')))
def fill_recipe(self, recipe, fields):
if 'thumbnail_url' in fields or 'instructions' in fields:
rec = self.get_recipe(recipe.id)
recipe.picture_url = rec.picture_url
recipe.instructions = rec.instructions
recipe.ingredients = rec.ingredients
recipe.comments = rec.comments
recipe.nb_person = rec.nb_person
recipe.cooking_time = rec.cooking_time
recipe.preparation_time = rec.preparation_time
return recipe
OBJECTS = {
Recipe:fill_recipe,
}

View file

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser
from .pages import RecipePage, ResultsPage
__all__ = ['MarmitonBrowser']
class MarmitonBrowser(BaseBrowser):
DOMAIN = 'www.marmiton.org'
PROTOCOL = 'http'
ENCODING = 'utf-8'
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
PAGES = {
'http://www.marmiton.org/recettes/recherche.aspx.*': ResultsPage,
'http://www.marmiton.org/recettes/recette_.*': RecipePage,
}
def iter_recipes(self, pattern):
self.location('http://www.marmiton.org/recettes/recherche.aspx?st=5&cli=1&aqt=%s' % (pattern))
assert self.is_on_page(ResultsPage)
return self.page.iter_recipes()
def get_recipe(self, id):
self.location('http://www.marmiton.org/recettes/recette_%s.aspx' % id)
assert self.is_on_page(RecipePage)
return self.page.get_recipe(id)

103
modules/marmiton/pages.py Normal file
View file

@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.recipe import Recipe
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.browser import BasePage
__all__ = ['RecipePage', 'ResultsPage']
class ResultsPage(BasePage):
""" Page which contains results as a list of recipies
"""
def iter_recipes(self):
for div in self.parser.select(self.document.getroot(),'div.m_search_result'):
tds = self.parser.select(div,'td')
if len(tds) == 2:
title = NotAvailable
thumbnail_url = NotAvailable
short_description = NotAvailable
imgs = self.parser.select(tds[0],'img')
if len(imgs) > 0:
thumbnail_url = unicode(imgs[0].attrib('src',''))
link = self.parser.select(tds[1],'div.m_search_titre_recette a',1)
title = unicode(link.text)
id = link.attrib.get('href','').replace('.aspx','').replace('/recettes/recette_','')
short_description = unicode(' '.join(self.parser.select(tds[1],'div.m_search_result_part4',1).text.strip().split('\n')))
recipe = Recipe(id,title)
recipe.thumbnail_url = thumbnail_url
recipe.short_description= short_description
recipe.instructions = NotLoaded
recipe.ingredients = NotLoaded
recipe.nb_person = NotLoaded
recipe.cooking_time = NotLoaded
recipe.preparation_time = NotLoaded
yield recipe
class RecipePage(BasePage):
""" Page which contains a recipe
"""
def get_recipe(self, id):
title = NotAvailable
preparation_time = NotAvailable
cooking_time = NotAvailable
nb_person = NotAvailable
ingredients = NotAvailable
picture_url = NotAvailable
instructions = NotAvailable
comments = []
title = unicode(self.parser.select(self.document.getroot(),'h1.m_title',1).text_content().strip())
main = self.parser.select(self.document.getroot(),'div.m_content_recette_main',1)
preparation_time = int(self.parser.select(main,'p.m_content_recette_info span.preptime',1).text_content())
cooking_time = int(self.parser.select(main,'p.m_content_recette_info span.cooktime',1).text_content())
ing_header_line = self.parser.select(main,'p.m_content_recette_ingredients span',1).text_content()
if '(pour' in ing_header_line and ')' in ing_header_line:
nb_person = int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0])
ingredients = self.parser.select(main,'p.m_content_recette_ingredients',1).text_content().strip().split('- ')
ingredients=ingredients[1:]
rinstructions = self.parser.select(main,'div.m_content_recette_todo',1).text_content().strip()
instructions = u''
for line in rinstructions.split('\n'):
instructions += '%s\n'%line.strip()
imgillu = self.parser.select(self.document.getroot(),'a.m_content_recette_illu img')
if len(imgillu) > 0:
picture_url = unicode(imgillu[0].attrib.get('src',''))
for divcom in self.parser.select(self.document.getroot(),'div.m_commentaire_row'):
note = self.parser.select(divcom,'div.m_commentaire_note span',1).text.strip()
user = self.parser.select(divcom,'div.m_commentaire_content span',1).text.strip()
content = self.parser.select(divcom,'div.m_commentaire_content p',1).text.strip()
comments.append(u'user: %s, note: %s, comment: %s'%(user,note,content))
recipe = Recipe(id,title)
recipe.preparation_time = preparation_time
recipe.cooking_time = cooking_time
recipe.nb_person = nb_person
recipe.ingredients = ingredients
recipe.instructions = instructions
recipe.picture_url = picture_url
recipe.comments = comments
recipe.thumbnail_url = NotLoaded
return recipe

33
modules/marmiton/test.py Normal file
View file

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
class MarmitonTest(BackendTest):
BACKEND = 'marmiton'
def test_recipe(self):
recipes = self.backend.iter_recipes('fondue')
for recipe in recipes:
full_recipe = self.backend.get_recipe(recipe.id)
assert full_recipe.instructions
assert full_recipe.ingredients
assert full_recipe.title
assert full_recipe.preparation_time

27
scripts/cookboob Executable file
View file

@ -0,0 +1,27 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: ft=python et softtabstop=4 cinoptions=4 shiftwidth=4 ts=4 ai
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.applications.cookboob import Cookboob
if __name__ == '__main__':
Cookboob.run()

View file

@ -22,16 +22,16 @@ from __future__ import with_statement
import sys import sys
from weboob.capabilities.recipe import ICapRecipe from weboob.capabilities.recipe import ICapRecipe
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.application.repl import ReplApplication from weboob.tools.application.repl import ReplApplication
from weboob.tools.application.formatters.iformatter import IFormatter, PrettyFormatter from weboob.tools.application.formatters.iformatter import IFormatter, PrettyFormatter
from weboob.core import CallErrors
__all__ = ['Cookboob'] __all__ = ['Cookboob']
class RecipeInfoFormatter(IFormatter): class RecipeInfoFormatter(IFormatter):
MANDATORY_FIELDS = ('id', 'title', 'preparation_time', 'cooking_time', 'ingredients', 'instructions', 'nb_person') MANDATORY_FIELDS = ('id', 'title', 'preparation_time', 'cooking_time', 'ingredients', 'instructions', 'nb_person', 'comments')
def format_obj(self, obj, alias): def format_obj(self, obj, alias):
result = u'%s%s%s\n' % (self.BOLD, obj.title, self.NC) result = u'%s%s%s\n' % (self.BOLD, obj.title, self.NC)
@ -39,12 +39,14 @@ class RecipeInfoFormatter(IFormatter):
result += 'Preparation time: %s\n' % obj.preparation_time result += 'Preparation time: %s\n' % obj.preparation_time
result += 'Cooking time: %s\n' % obj.cooking_time result += 'Cooking time: %s\n' % obj.cooking_time
result += 'Amount of people: %s\n' % obj.nb_person result += 'Amount of people: %s\n' % obj.nb_person
result += '\n%Ingredients%s\n' % (self.BOLD, self.NC) result += '\n%sIngredients%s\n' % (self.BOLD, self.NC)
for i in obj.ingredients: for i in obj.ingredients:
result += ' * %s'%i result += ' * %s\n'%i
result += '\n\n%Instructions%s\n' % (self.BOLD, self.NC) result += '\n%sInstructions%s\n' % (self.BOLD, self.NC)
for i in obj.instructions: result += '%s\n'%obj.instructions
result += ' * %s'%i result += '\n%sComments%s\n' % (self.BOLD, self.NC)
for c in obj.comments:
result += ' * %s\n'%c
return result return result
@ -56,10 +58,10 @@ class RecipeListFormatter(PrettyFormatter):
def get_description(self, obj): def get_description(self, obj):
result = u'' result = u''
if obj.short_description != NotAvailable: if obj.preparation_time != NotAvailable and obj.preparation_time != NotLoaded:
result += 'description: %s '%obj.short_description
if obj.preparation_time != NotAvailable:
result += 'prep time: %smin'%obj.preparation_time result += 'prep time: %smin'%obj.preparation_time
if obj.short_description != NotAvailable:
result += 'description: %s\n'%obj.short_description
return result return result
@ -90,7 +92,7 @@ class Cookboob(ReplApplication):
""" """
recipe = self.get_object(id, 'get_recipe') recipe = self.get_object(id, 'get_recipe')
if not recipee: if not recipe:
print >>sys.stderr, 'Recipe not found: %s' % id print >>sys.stderr, 'Recipe not found: %s' % id
return 3 return 3

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .base import IBaseCap, CapBaseObject, StringField, IntField, UserError, Field from .base import IBaseCap, CapBaseObject, StringField, IntField, Field
__all__ = ['Recipe', 'ICapRecipe'] __all__ = ['Recipe', 'ICapRecipe']
@ -30,11 +30,14 @@ class Recipe(CapBaseObject):
""" """
title = StringField('Title of the recipe') title = StringField('Title of the recipe')
thumbnail_url = StringField('Direct url to recipe thumbnail') thumbnail_url = StringField('Direct url to recipe thumbnail')
picture_url = StringField('Direct url to recipe picture')
short_description = StringField('Short description of a recipe')
nb_person = IntField('The recipe was made for this amount of persons') nb_person = IntField('The recipe was made for this amount of persons')
preparation_time = IntField('Preparation time of the recipe in minutes') preparation_time = IntField('Preparation time of the recipe in minutes')
cooking_time = IntField('Cooking time of the recipe in minutes') cooking_time = IntField('Cooking time of the recipe in minutes')
ingredients = Field('Ingredient list necessary for the recipe',list) ingredients = Field('Ingredient list necessary for the recipe',list)
instructions = Field('Instruction step list of the recipe',list) instructions = StringField('Instruction step list of the recipe')
comments = Field('User comments about the recipe',list)
def __init__(self, id, title): def __init__(self, id, title):
CapBaseObject.__init__(self, id) CapBaseObject.__init__(self, id)