From b724840ddb69444b20d737c84f664073dae916a2 Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Fri, 15 Mar 2013 19:27:41 +0100 Subject: [PATCH] new backend 750g for cookboob, get_recipe NYI --- modules/750g/__init__.py | 22 ++++++++ modules/750g/backend.py | 63 ++++++++++++++++++++++ modules/750g/browser.py | 46 ++++++++++++++++ modules/750g/favicon.png | Bin 0 -> 1429 bytes modules/750g/pages.py | 111 ++++++++++++++++++++++++++++++++++++++ modules/750g/test.py | 33 ++++++++++++ modules/marmiton/pages.py | 2 +- 7 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 modules/750g/__init__.py create mode 100644 modules/750g/backend.py create mode 100644 modules/750g/browser.py create mode 100644 modules/750g/favicon.png create mode 100644 modules/750g/pages.py create mode 100644 modules/750g/test.py diff --git a/modules/750g/__init__.py b/modules/750g/__init__.py new file mode 100644 index 00000000..da6bec56 --- /dev/null +++ b/modules/750g/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import SevenFiftyGramsBackend + +__all__ = ['SevenFiftyGramsBackend'] diff --git a/modules/750g/backend.py b/modules/750g/backend.py new file mode 100644 index 00000000..7ead3b8f --- /dev/null +++ b/modules/750g/backend.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.recipe import ICapRecipe,Recipe +from weboob.tools.backend import BaseBackend + +from .browser import SevenFiftyGramsBrowser + +from urllib import quote_plus + +__all__ = ['SevenFiftyGramsBackend'] + + +class SevenFiftyGramsBackend(BaseBackend, ICapRecipe): + NAME = '750g' + MAINTAINER = u'Julien Veyssier' + EMAIL = 'julien.veyssier@aiur.fr' + VERSION = '0.f' + DESCRIPTION = '750g recipe website' + LICENSE = 'AGPLv3+' + BROWSER = SevenFiftyGramsBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_recipe(self, id): + return self.browser.get_recipe(id) + + def iter_recipes(self, pattern): + return self.browser.iter_recipes(pattern.encode('utf-8')) + + def fill_recipe(self, recipe, fields): + if 'thumbnail_url' in fields or 'instructions' in fields: + rec = self.get_recipe(recipe.id) + recipe.picture_url = rec.picture_url + recipe.instructions = rec.instructions + recipe.ingredients = rec.ingredients + recipe.comments = rec.comments + recipe.nb_person = rec.nb_person + recipe.cooking_time = rec.cooking_time + recipe.preparation_time = rec.preparation_time + + return recipe + + OBJECTS = { + Recipe:fill_recipe, + } diff --git a/modules/750g/browser.py b/modules/750g/browser.py new file mode 100644 index 00000000..39e677e2 --- /dev/null +++ b/modules/750g/browser.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser + +from .pages import RecipePage, ResultsPage + + +__all__ = ['SevenFiftyGramsBrowser'] + +class SevenFiftyGramsBrowser(BaseBrowser): + DOMAIN = 'www.750g.com' + PROTOCOL = 'http' + ENCODING = 'utf-8' + USER_AGENT = BaseBrowser.USER_AGENTS['wget'] + PAGES = { + 'http://www.750g.com/recettes_.*.htm': ResultsPage, + 'http://www.750g.com/.*r[0-9]*.htm': RecipePage, + } + + def iter_recipes(self, pattern): + self.location('http://www.750g.com/recettes_%s.htm' % (pattern.replace(' ','_'))) + assert self.is_on_page(ResultsPage) + return self.page.iter_recipes() + + def get_recipe(self, id): + self.location('http://www.750g.com/%s.htm' % id) + assert self.is_on_page(RecipePage) + return self.page.get_recipe(id) diff --git a/modules/750g/favicon.png b/modules/750g/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..9b8c4d27dcadf45814de97ee24963b93f375c88b GIT binary patch literal 1429 zcmV;G1#0?Px#24YJ`L;(K){{a7>y{D4^000SaNLh0L01FcU01FcV0GgZ_00007bV*G`2i*e? z5jqJxCSXkf00k9EL_t(|+U=T4h#W;2$A4AbJx`XXf#`bCD61xF)D^)H(Fm(26Ca1* z1Com%g2KA$!3W6tz!gDB*o#OI4R~-7Tr~%iOE3z#7-C>uA)v^Tgy2zTXM3i*%7ZnN z+Q~e29{re&4FlCZ)m`8JU*GrtzN&7ai$%?4e7M67K$J?CT+_wk`X!(l;C6jFks;R` z_-^l!aT$^X__f?ClQ1YvhwKHkE*8ot zpVvqjW-n+-Kr`U&W`0;nXd42Jv)!s4M<_%!hFt5wjh-+}@!!r?+06qQ-Mmv+3`KR> z&1$fn@I^^LKl(DzwLmLRh*AWj1m9C3WT(QkW`cG$_7wu%03Jk0^t;xNycCXG_@Wq3 zMrBmL;Sg6ii9zlO4#H(y0Z*(D>em~(naQ}p51fF0j7PnG^9tF^vowsLMlYC6$x<=A=47~No};sG=NW% z1eq4_y)30uN5G)cU7YV8YrYuPbq@-&Lg&N^5>os%4)WmX}vOcRmQ%SuV4oRN~4~i7DSF z(9z9N&!5X53B!(kb~+AH23=B z6M`f8JXZ_BZOh9nXpIM)FBI4)#3lD0tX6rpQUTxt&*MJFVaLh}0Goy2k3u0`9?*Eq zR;zXJHwwYge4b50@M{>dU|e{Y<1__d)|xjA_%R`P)$_3Z>uf^^oD4%|gMedU$XTt~ z0Nf%adt8?nJTLw%a4HO$G5|(^?{Yad8K9qAF8zHNvb|hpzQN7s!;lN6y~lMCLf~8f z+xcatQsJJJ6+R0B{?wWZ@HcQ=sa0deMnFYt_W3@wDE}mzWsCgpv4PfnX&F@kPAJ7= zzR$PT^}|+sySdk%^=l28^FhFC)hY@Ymy+M}dET^U{2@zEVk6+aYL(N**xio9Q|`s6 za)n^2P~coX&o|jDcgu^zy<-XENu}!Tn@pcY>)N;h_MvspY$n6aQgX}^`E|&pVumGNGGTSGt{~s~HPFdIPl9GS4uJ_9WJEV-4 zfaz+LvT48Wd0f}Q0GpmGEHSi~R9Qf4&DSO@t`UM=hJcLKcTp*RQi@ve-0wJKn+R-V z1S~1VL1W}b;H3ujT~vxgLBL+u<$57l(wbL{P};Lv1G!|p;1{L%F$_6l&SyN2ya6`h zILsNBA2%to?*6j3XzrPSPlKS&{&Sv3p@IFkDi!9!koTJuL1RMjt`#oNxGpoM-@O*F zDc5D6MeP1ehEk=%@i1gm2>#N#o-H1B-FON3%2Mv9n+(3&b@9!NUeKBxFeW9BI}Vec z$B0GP+nz^BYrY5q6mX}MOnM$qG(2}yN)G38br0KW^kl0LZ0pejhGsEzMk!7!#e. + + +from weboob.capabilities.recipe import Recipe +from weboob.capabilities.base import NotAvailable, NotLoaded +from weboob.tools.browser import BasePage + + +__all__ = ['RecipePage', 'ResultsPage'] + + +class ResultsPage(BasePage): + """ Page which contains results as a list of recipies + """ + def iter_recipes(self): + for div in self.parser.select(self.document.getroot(),'div.recette_description > div.data'): + links = self.parser.select(div,'div.info > p.title > a.fn') + if len(links) > 0: + link = links[0] + title = unicode(link.text) + id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm')) + thumbnail_url = NotAvailable + short_description = NotAvailable + + imgs = self.parser.select(div,'img.recipe-image') + if len(imgs) > 0: + thumbnail_url = unicode(imgs[0].attrib.get('src','')) + short_description = unicode(' '.join(self.parser.select(div,'div.infos_column',1).text_content().split()).strip()) + imgs_cost = self.parser.select(div,'div.infos_column img') + cost_tot = len(imgs_cost) + cost_on = 0 + for img in imgs_cost: + if img.attrib.get('src','').endswith('euro_on.png'): + cost_on += 1 + short_description += u' %s/%s'%(cost_on,cost_tot) + + recipe = Recipe(id,title) + recipe.thumbnail_url = thumbnail_url + recipe.short_description= short_description + recipe.instructions = NotLoaded + recipe.ingredients = NotLoaded + recipe.nb_person = NotLoaded + recipe.cooking_time = NotLoaded + recipe.preparation_time = NotLoaded + yield recipe + + + +class RecipePage(BasePage): + """ Page which contains a recipe + """ + def get_recipe(self, id): + title = NotAvailable + preparation_time = NotAvailable + cooking_time = NotAvailable + nb_person = NotAvailable + ingredients = NotAvailable + picture_url = NotAvailable + instructions = NotAvailable + comments = [] + + title = unicode(self.parser.select(self.document.getroot(),'h1.m_title',1).text_content().strip()) + main = self.parser.select(self.document.getroot(),'div.m_content_recette_main',1) + preparation_time = int(self.parser.select(main,'p.m_content_recette_info span.preptime',1).text_content()) + cooking_time = int(self.parser.select(main,'p.m_content_recette_info span.cooktime',1).text_content()) + ing_header_line = self.parser.select(main,'p.m_content_recette_ingredients span',1).text_content() + if '(pour' in ing_header_line and ')' in ing_header_line: + nb_person = int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0]) + ingredients = self.parser.select(main,'p.m_content_recette_ingredients',1).text_content().strip().split('- ') + ingredients=ingredients[1:] + rinstructions = self.parser.select(main,'div.m_content_recette_todo',1).text_content().strip() + instructions = u'' + for line in rinstructions.split('\n'): + instructions += '%s\n'%line.strip() + instructions = instructions.strip('\n') + imgillu = self.parser.select(self.document.getroot(),'a.m_content_recette_illu img') + if len(imgillu) > 0: + picture_url = unicode(imgillu[0].attrib.get('src','')) + for divcom in self.parser.select(self.document.getroot(),'div.m_commentaire_row'): + note = self.parser.select(divcom,'div.m_commentaire_note span',1).text.strip() + user = self.parser.select(divcom,'div.m_commentaire_content span',1).text.strip() + content = self.parser.select(divcom,'div.m_commentaire_content p',1).text.strip() + comments.append(u'user: %s, note: %s, comment: %s'%(user,note,content)) + + recipe = Recipe(id,title) + recipe.preparation_time = preparation_time + recipe.cooking_time = cooking_time + recipe.nb_person = nb_person + recipe.ingredients = ingredients + recipe.instructions = instructions + recipe.picture_url = picture_url + recipe.comments = comments + recipe.thumbnail_url = NotLoaded + return recipe diff --git a/modules/750g/test.py b/modules/750g/test.py new file mode 100644 index 00000000..f4fdf1fa --- /dev/null +++ b/modules/750g/test.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.test import BackendTest + +class SevenFiftyGramsTest(BackendTest): + BACKEND = '750g' + + def test_recipe(self): + recipes = self.backend.iter_recipes('fondue') + for recipe in recipes: + full_recipe = self.backend.get_recipe(recipe.id) + assert full_recipe.instructions + assert full_recipe.ingredients + assert full_recipe.title + assert full_recipe.preparation_time + diff --git a/modules/marmiton/pages.py b/modules/marmiton/pages.py index 95c2b4eb..11c077a2 100644 --- a/modules/marmiton/pages.py +++ b/modules/marmiton/pages.py @@ -38,7 +38,7 @@ class ResultsPage(BasePage): short_description = NotAvailable imgs = self.parser.select(tds[0],'img') if len(imgs) > 0: - thumbnail_url = unicode(imgs[0].attrib('src','')) + thumbnail_url = unicode(imgs[0].attrib.get('src','')) link = self.parser.select(tds[1],'div.m_search_titre_recette a',1) title = unicode(link.text) id = link.attrib.get('href','').replace('.aspx','').replace('/recettes/recette_','')