[cuisineaz] site changed / rewritten using browser 2

This commit is contained in:
Bezleputh 2015-08-06 19:34:07 +02:00
commit ab533ad1b7
4 changed files with 110 additions and 154 deletions

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.deprecated.browser import Browser, BrowserHTTPNotFound from weboob.browser import PagesBrowser, URL
from .pages import RecipePage, ResultsPage from .pages import RecipePage, ResultsPage
@ -26,26 +26,18 @@ from .pages import RecipePage, ResultsPage
__all__ = ['CuisineazBrowser'] __all__ = ['CuisineazBrowser']
class CuisineazBrowser(Browser): class CuisineazBrowser(PagesBrowser):
DOMAIN = 'www.cuisineaz.com'
PROTOCOL = 'http' BASEURL = 'http://www.cuisineaz.com'
ENCODING = 'utf-8'
USER_AGENT = Browser.USER_AGENTS['wget'] search = URL('recettes/recherche_v2.aspx\?recherche=(?P<pattern>.*)', ResultsPage)
PAGES = { recipe = URL('recettes/(?P<_id>.*).aspx', RecipePage)
'http://www.cuisineaz.com/recettes/recherche_v2.aspx\?recherche=.*': ResultsPage,
'http://www.cuisineaz.com/recettes/.*[0-9]*.aspx': RecipePage,
}
def iter_recipes(self, pattern): def iter_recipes(self, pattern):
self.location('http://www.cuisineaz.com/recettes/recherche_v2.aspx?recherche=%s' % ( return self.search.go(pattern=pattern.replace(' ', '-')).iter_recipes()
pattern.replace(' ', '-')))
assert self.is_on_page(ResultsPage)
return self.page.iter_recipes()
def get_recipe(self, id): def get_recipe(self, _id, obj=None):
try: return self.recipe.go(_id=_id).get_recipe(obj=obj)
self.location('http://www.cuisineaz.com/recettes/%s.aspx' % id)
except BrowserHTTPNotFound: def get_comments(self, _id):
return return self.recipe.go(_id=_id).get_comments()
if self.is_on_page(RecipePage):
return self.page.get_recipe(id)

View file

@ -50,15 +50,10 @@ class CuisineazModule(Module, CapRecipe):
def fill_recipe(self, recipe, fields): def fill_recipe(self, recipe, fields):
if 'nb_person' in fields or 'instructions' in fields: if 'nb_person' in fields or 'instructions' in fields:
rec = self.get_recipe(recipe.id) recipe = self.browser.get_recipe(recipe.id, recipe)
recipe.picture_url = rec.picture_url
recipe.instructions = rec.instructions if 'comments' in fields:
recipe.ingredients = rec.ingredients recipe.comments = list(self.browser.get_comments(recipe.id))
recipe.comments = rec.comments
recipe.author = rec.author
recipe.nb_person = rec.nb_person
recipe.cooking_time = rec.cooking_time
recipe.preparation_time = rec.preparation_time
return recipe return recipe

View file

@ -19,133 +19,100 @@
from weboob.capabilities.recipe import Recipe, Comment from weboob.capabilities.recipe import Recipe, Comment
from weboob.capabilities.base import NotAvailable, NotLoaded from weboob.capabilities.base import NotAvailable
from weboob.deprecated.browser import Page from weboob.browser.pages import HTMLPage, pagination
from weboob.browser.elements import ItemElement, method, ListElement
from weboob.browser.filters.standard import CleanText, Regexp, Env, Time
from weboob.browser.filters.html import XPath, CleanHTML
import re
import datetime
class ResultsPage(Page): class CuisineazDuration(Time):
klass = datetime.timedelta
_regexp = re.compile(r'((?P<hh>\d+) h)?((?P<mm>\d+) min)?(?P<ss>\d+)?')
kwargs = {'hours': 'hh', 'minutes': 'mm', 'seconds': 'ss'}
class ResultsPage(HTMLPage):
""" Page which contains results as a list of recipies """ Page which contains results as a list of recipies
""" """
def iter_recipes(self): @pagination
for div in self.parser.select(self.document.getroot(), 'div.rechRecette'): @method
thumbnail_url = NotAvailable class iter_recipes(ListElement):
short_description = NotAvailable item_xpath = '//div[@id="divRecette"]'
imgs = self.parser.select(div, 'img')
if len(imgs) > 0:
url = unicode(imgs[0].attrib.get('src', ''))
if url.startswith('http://'):
thumbnail_url = url
link = self.parser.select(div, 'a.rechRecetTitle', 1) def next_page(self):
title = unicode(link.text) next = CleanText('//li[@class="next"]/span/a/@href',
id = unicode(link.attrib.get('href', '').split( default=None)(self)
'/')[-1].replace('.aspx', '')) if next:
return next
short_description = u'' class item(ItemElement):
ldivprix = self.parser.select(div, 'div.prix') klass = Recipe
if len(ldivprix) > 0:
divprix = ldivprix[0]
nbprixneg = 0
spanprix = self.parser.select(divprix, 'span')
if len(spanprix) > 0:
nbprixneg = unicode(spanprix[0].text).count(u'')
nbprixtot = unicode(divprix.text_content()).count(u'')
short_description += u'Cost: %s/%s ; ' % (
nbprixtot - nbprixneg, nbprixtot)
short_description += unicode(' '.join(self.parser.select( def condition(self):
div, 'div.rechResume', 1).text_content().split()).strip()).replace(u'', '') return Regexp(CleanText('./div[has-class("searchTitle")]/h2/a/@href'),
short_description += u' ' 'http://www.cuisineaz.com/recettes/(.*).aspx',
short_description += unicode(' '.join(self.parser.select( default=None)(self.el)
div, 'div.rechIngredients', 1).text_content().split()).strip())
recipe = Recipe(id, title) obj_id = Regexp(CleanText('./div[has-class("searchTitle")]/h2/a/@href'),
recipe.thumbnail_url = thumbnail_url 'http://www.cuisineaz.com/recettes/(.*).aspx')
recipe.short_description = short_description obj_title = CleanText('./div[has-class("searchTitle")]/h2/a')
recipe.instructions = NotLoaded
recipe.ingredients = NotLoaded obj_thumbnail_url = CleanText('./div[has-class("searchImg")]/span/img[@data-src!=""]/@data-src|./div[has-class("searchImg")]/div/span/img[@src!=""]/@src',
recipe.nb_person = NotLoaded default=None)
recipe.cooking_time = NotLoaded
recipe.preparation_time = NotLoaded obj_short_description = CleanText('./div[has-class("searchIngredients")]')
recipe.author = NotLoaded
yield recipe
class RecipePage(Page): class RecipePage(HTMLPage):
""" Page which contains a recipe """ Page which contains a recipe
""" """
@method
class get_recipe(ItemElement):
klass = Recipe
def get_recipe(self, id): obj_id = Env('_id')
title = NotAvailable obj_title = CleanText('//div[@id="ficheRecette"]/h1')
preparation_time = NotAvailable
cooking_time = NotAvailable
author = NotAvailable
nb_person = NotAvailable
ingredients = NotAvailable
picture_url = NotAvailable
instructions = NotAvailable
comments = NotAvailable
title = unicode(self.parser.select( obj_picture_url = CleanText('//img[@id="shareimg" and @src!=""]/@src', default=None)
self.document.getroot(), 'div#ficheRecette h1.fn.recetteH1', 1).text)
main = self.parser.select(
self.document.getroot(), 'div#ficheRecette', 1)
imgillu = self.parser.select(main, 'div#recetteLeft img.photo')
if len(imgillu) > 0:
picture_url = unicode(imgillu[0].attrib.get('src', ''))
l_spanprep = self.parser.select(main, 'span.preptime') obj_thumbnail_url = CleanText('//img[@id="shareimg" and @src!=""]/@src', default=None)
if len(l_spanprep) > 0:
preparation_time = int(self.parser.tocleanstring(l_spanprep[0]).split()[0])
l_cooktime = self.parser.select(main, 'span.cooktime')
if len(l_cooktime) > 0:
cooking_time = int(self.parser.tocleanstring(l_cooktime[0]).split()[0])
l_nbpers = self.parser.select(main, 'td#recipeQuantity span')
if len(l_nbpers) > 0:
rawnb = l_nbpers[0].text.split()[0]
if '/' in rawnb:
nbs = rawnb.split('/')
nb_person = [int(nbs[0]), int(nbs[1])]
else:
nb_person = [int(rawnb)]
ingredients = [] def obj_preparation_time(self):
l_ing = self.parser.select(main, 'div#ingredients li.ingredient') _prep = CuisineazDuration(CleanText('//span[@id="ctl00_ContentPlaceHolder_LblRecetteTempsPrepa"]'))(self)
for ing in l_ing: return int(_prep.total_seconds() / 60)
ingtxt = unicode(ing.text_content().strip())
if ingtxt != '':
ingredients.append(ingtxt)
instructions = u'' def obj_cooking_time(self):
l_divinst = self.parser.select( _cook = CuisineazDuration(CleanText('//span[@id="ctl00_ContentPlaceHolder_LblRecetteTempsCuisson"]'))(self)
main, 'div#preparation span.instructions div') return int(_cook.total_seconds() / 60)
for inst in l_divinst:
instructions += '%s: ' % inst.text
instructions += '%s\n' % inst.getnext().text
divcoms = self.parser.select(self.document.getroot(), 'div.comment') def obj_nb_person(self):
if len(divcoms) > 0: nb_pers = CleanText('//span[@id="ctl00_ContentPlaceHolder_LblRecetteNombre"]')(self)
comments = [] return [nb_pers] if nb_pers else NotAvailable
for divcom in divcoms:
author = unicode(self.parser.select(
divcom, 'div.commentAuthor span', 1).text)
comtxt = unicode(self.parser.select(
divcom, 'p', 1).text_content().strip())
comments.append(Comment(author=author, text=comtxt))
spans_author = self.parser.select(self.document.getroot(), 'span.author') def obj_ingredients(self):
if len(spans_author) > 0: ingredients = []
author = unicode(spans_author[0].text_content().strip()) for el in XPath('//div[@id="ingredients"]/ul/li')(self):
ingredients.append(CleanText('.')(el))
return ingredients
recipe = Recipe(id, title) obj_instructions = CleanHTML('//div[@id="preparation"]/span[@class="instructions"]')
recipe.preparation_time = preparation_time
recipe.cooking_time = cooking_time @method
recipe.nb_person = nb_person class get_comments(ListElement):
recipe.ingredients = ingredients item_xpath = '//div[@class="comment pb15 row"]'
recipe.instructions = instructions
recipe.picture_url = picture_url class item(ItemElement):
recipe.comments = comments klass = Comment
recipe.author = author
recipe.thumbnail_url = NotLoaded obj_author = CleanText('./div[has-class("comment-left")]/div/div/div[@class="fs18 txtcaz mb5 first-letter"]')
return recipe
obj_text = CleanText('./div[has-class("comment-right")]/div/p')
obj_id = CleanText('./@id')
def obj_rate(self):
return len(XPath('./div[has-class("comment-right")]/div/div/div/span/span[@class="icon icon-star"]')(self))

View file

@ -18,13 +18,15 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
import itertools
class CuisineazTest(BackendTest): class CuisineazTest(BackendTest):
MODULE = 'cuisineaz' MODULE = 'cuisineaz'
def test_recipe(self): def test_recipe(self):
recipes = self.backend.iter_recipes('fondue') recipes = list(itertools.islice(self.backend.iter_recipes(u'purée'), 0, 20))
assert len(recipes)
for recipe in recipes: for recipe in recipes:
full_recipe = self.backend.get_recipe(recipe.id) full_recipe = self.backend.get_recipe(recipe.id)
assert full_recipe.instructions assert full_recipe.instructions