From 4aee273997a58b7bc3df4059e46a1890cc568bbb Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Sat, 6 Apr 2013 21:20:04 +0200 Subject: [PATCH] new recipe backend : supertoinette --- modules/supertoinette/__init__.py | 22 +++++ modules/supertoinette/backend.py | 64 ++++++++++++++ modules/supertoinette/browser.py | 50 +++++++++++ modules/supertoinette/pages.py | 133 ++++++++++++++++++++++++++++++ modules/supertoinette/test.py | 32 +++++++ 5 files changed, 301 insertions(+) create mode 100644 modules/supertoinette/__init__.py create mode 100644 modules/supertoinette/backend.py create mode 100644 modules/supertoinette/browser.py create mode 100644 modules/supertoinette/pages.py create mode 100644 modules/supertoinette/test.py diff --git a/modules/supertoinette/__init__.py b/modules/supertoinette/__init__.py new file mode 100644 index 00000000..8d17e964 --- /dev/null +++ b/modules/supertoinette/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import SupertoinetteBackend + +__all__ = ['SupertoinetteBackend'] diff --git a/modules/supertoinette/backend.py b/modules/supertoinette/backend.py new file mode 100644 index 00000000..23e5d728 --- /dev/null +++ b/modules/supertoinette/backend.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.recipe import ICapRecipe, Recipe +from weboob.tools.backend import BaseBackend + +from .browser import SupertoinetteBrowser + +import unicodedata + +__all__ = ['SupertoinetteBackend'] + + +class SupertoinetteBackend(BaseBackend, ICapRecipe): + NAME = 'supertoinette' + MAINTAINER = u'Julien Veyssier' + EMAIL = 'julien.veyssier@aiur.fr' + VERSION = '0.g' + DESCRIPTION = u'Super Toinette, la cuisine familiale French recipe website' + LICENSE = 'AGPLv3+' + BROWSER = SupertoinetteBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_recipe(self, id): + return self.browser.get_recipe(id) + + def iter_recipes(self, pattern): + return self.browser.iter_recipes(pattern.encode('utf-8')) + + def fill_recipe(self, recipe, fields): + if 'nb_person' in fields or 'instructions' in fields: + rec = self.get_recipe(recipe.id) + recipe.picture_url = rec.picture_url + recipe.instructions = rec.instructions + recipe.ingredients = rec.ingredients + recipe.comments = rec.comments + recipe.author = rec.author + recipe.nb_person = rec.nb_person + recipe.cooking_time = rec.cooking_time + recipe.preparation_time = rec.preparation_time + + return recipe + + OBJECTS = { + Recipe: fill_recipe, + } diff --git a/modules/supertoinette/browser.py b/modules/supertoinette/browser.py new file mode 100644 index 00000000..cc383734 --- /dev/null +++ b/modules/supertoinette/browser.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound + +from .pages import RecipePage, ResultsPage + + +__all__ = ['SupertoinetteBrowser'] + + +class SupertoinetteBrowser(BaseBrowser): + DOMAIN = 'www.supertoinette.com' + PROTOCOL = 'http' + ENCODING = 'utf-8' + USER_AGENT = BaseBrowser.USER_AGENTS['desktop_firefox'] + PAGES = { + 'http://www.supertoinette.com/liste-recettes/.*': ResultsPage, + 'http://www.supertoinette.com/recette/[0-9]*.*': RecipePage, + } + + def iter_recipes(self, pattern): + self.location('http://www.supertoinette.com/liste-recettes/%s/' % (pattern)) + assert self.is_on_page(ResultsPage) + return self.page.iter_recipes() + + def get_recipe(self, id): + try: + self.location('http://www.supertoinette.com/recette/%s/' % id) + except BrowserHTTPNotFound: + return + if self.is_on_page(RecipePage): + return self.page.get_recipe(id) diff --git a/modules/supertoinette/pages.py b/modules/supertoinette/pages.py new file mode 100644 index 00000000..ef8466a8 --- /dev/null +++ b/modules/supertoinette/pages.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.recipe import Recipe, Comment +from weboob.capabilities.base import NotAvailable, NotLoaded +from weboob.tools.browser import BasePage + +import string + +__all__ = ['RecipePage', 'ResultsPage'] + + +class ResultsPage(BasePage): + """ Page which contains results as a list of recipies + """ + def iter_recipes(self): + for div in self.parser.select(self.document.getroot(), 'div.result-recipe'): + thumbnail_url = NotAvailable + short_description = NotAvailable + imgs = self.parser.select(div, 'a.pull-image-left img') + if len(imgs) > 0: + url = unicode(imgs[0].attrib.get('src', '')) + if url.startswith('http://'): + thumbnail_url = url + + link = self.parser.select(div, 'div.result-text a', 1) + title = unicode(link.text) + id = unicode(link.attrib.get('href', '').split('/')[2]) + + txt = self.parser.select(div, 'div.result-text p', 1) + short_description = unicode(txt.text_content()) + + recipe = Recipe(id, title) + recipe.thumbnail_url = thumbnail_url + recipe.short_description = short_description + recipe.instructions = NotLoaded + recipe.ingredients = NotLoaded + recipe.nb_person = NotLoaded + recipe.cooking_time = NotLoaded + recipe.preparation_time = NotLoaded + recipe.author = NotLoaded + yield recipe + + +class RecipePage(BasePage): + """ Page which contains a recipe + """ + def get_recipe(self, id): + title = NotAvailable + preparation_time = NotAvailable + cooking_time = NotAvailable + author = NotAvailable + nb_person = NotAvailable + ingredients = NotAvailable + picture_url = NotAvailable + instructions = NotAvailable + comments = [] + + title = unicode(self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text) + main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1) + imgillu = self.parser.select(main, 'div.image-with-credit img') + if len(imgillu) > 0: + picture_url = unicode(imgillu[0].attrib.get('src', '')) + + l_spanprep = self.parser.select(self.document.getroot(), 'span.preptime[property$=prepTime]') + if len(l_spanprep) > 0: + preparation_time = 0 + prep = l_spanprep[0].attrib.get('content','') + if 'H' in prep: + preparation_time += 60 * (int(prep.split('PT')[-1].split('H')[0])) + if 'M' in prep: + preparation_time += int(prep.split('PT')[-1].split('H')[-1].split('M')[0]) + l_cooktime = self.parser.select(main, 'span.cooktime[property$=cookTime]') + if len(l_cooktime) > 0: + cooking_time = 0 + cook = l_cooktime[0].attrib.get('content','') + if 'H' in cook: + cooking_time += 60 * (int(cook.split('PT')[-1].split('H')[0])) + if 'M' in cook: + cooking_time += int(cook.split('PT')[-1].split('H')[-1].split('M')[0]) + l_nbpers = self.parser.select(main, 'div.ingredients p.servings') + if len(l_nbpers) > 0: + rawnb = l_nbpers[0].text.strip(string.letters+' ') + if '/' in rawnb: + nbs = rawnb.split('/') + nb_person = [int(nbs[0]), int(nbs[1])] + else: + nb_person = [int(rawnb)] + + ingredients = [] + l_ing = self.parser.select(main, 'div.ingredients ul.dotlist') + for ing in l_ing: + sublists = self.parser.select(ing, 'li') + for i in sublists: + ingtxt = unicode(i.text_content().strip()) + if ingtxt != '': + ingredients.append(' '.join(ingtxt.split())) + + instructions = u'' + num_inst = 1 + l_divinst = self.parser.select(self.document.getroot(), 'div#recipe-steps-list p.step-details') + for inst in l_divinst: + instructions += '%s: %s\n' % (num_inst, inst.text_content()) + num_inst += 1 + + recipe = Recipe(id, title) + recipe.preparation_time = preparation_time + recipe.cooking_time = cooking_time + recipe.nb_person = nb_person + recipe.ingredients = ingredients + recipe.instructions = instructions + recipe.picture_url = picture_url + recipe.comments = comments + recipe.author = author + recipe.thumbnail_url = NotLoaded + return recipe diff --git a/modules/supertoinette/test.py b/modules/supertoinette/test.py new file mode 100644 index 00000000..92d8fbbe --- /dev/null +++ b/modules/supertoinette/test.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.test import BackendTest + + +class SupertoinetteTest(BackendTest): + BACKEND = 'supertoinette' + + def test_recipe(self): + recipes = self.backend.iter_recipes('fondue') + for recipe in recipes: + full_recipe = self.backend.get_recipe(recipe.id) + assert full_recipe.instructions + assert full_recipe.ingredients + assert full_recipe.title