Make Collection more safe and sane

* Remove callbacks in Collection object
  Make Collection a "dumb" object (and also a base object,
  though it isn't very useful for now)
* Rename Path to WorkingPath, because it is more about managing state
  than being a single path.
* Rewrite almost all WorkingPath, because the code was overly
  complicated for no reason (I tried some special cases and it turned
  out that fromstring didn't handle them, and that the
  quote-escape-unquote was just unecessary). I also rewrote it to be
  more pythonic (no more lambdas and maps) and added tests.
* Require the full split path when creating a Collection. Because, come to
  think of it, an object needs an unique identifier; in the case of
  Collections, it is the full path, not only its last part.
  I might even replace the id by the full split path in the future.
* There is now only one way to get items of a Collection: calling
  iter_resources().
* Rewrite flatten_resources to iter_resources_flat(), which just calls
  iter_resources() recursively.
* Rewrite the collection part of the canalplus module. There is no more
  callback or a page calling the browser to check another page!
  The logic is only in iter_resources().
  The resulting code is not very pretty, but it should get better.
  As a bonus, avoid to reload the main XML file when we already have it
  open.
* change_path() now expects a split path and not a string.
* up/home special cases for "cd" are handled in the same place, and
  store the previous place properly (but are not yet exploitable by
  an user command).

This is a big commit but it would be hard to split it in *working*
commits.

If you read this entire commit message, I will buy you a beer.

refs #774
fixes #773
This commit is contained in:
Laurent Bachelier 2012-03-08 00:33:49 +01:00
commit b4b7182960
13 changed files with 147 additions and 148 deletions

View file

@ -28,7 +28,7 @@ from weboob.tools.browser.decorators import id2url
from .pages import InitPage, VideoPage from .pages import InitPage, VideoPage
from .video import CanalplusVideo from .video import CanalplusVideo
from weboob.capabilities.collection import Collection, CollectionNotFound from weboob.capabilities.collection import CollectionNotFound
__all__ = ['CanalplusBrowser'] __all__ = ['CanalplusBrowser']
@ -60,11 +60,8 @@ class CanalplusBrowser(BaseBrowser):
} }
def __init__(self, quality, *args, **kwargs): def __init__(self, quality, *args, **kwargs):
BaseBrowser.__init__(self, parser= self.PARSER, *args, **kwargs) BaseBrowser.__init__(self, parser=self.PARSER, *args, **kwargs)
if quality in self.FORMATS: self.quality = self.FORMATS.get(quality, self.FORMATS['hd'])
self.quality = self.FORMATS[quality]
else:
self.quality = 'HD'
def home(self): def home(self):
self.location('http://service.canal-plus.com/video/rest/initPlayer/cplus/') self.location('http://service.canal-plus.com/video/rest/initPlayer/cplus/')
@ -79,19 +76,33 @@ class CanalplusBrowser(BaseBrowser):
return self.page.get_video(video, self.quality) return self.page.get_video(video, self.quality)
def iter_resources(self, split_path): def iter_resources(self, split_path):
if not self.is_on_page(InitPage):
self.home() self.home()
collections = self.page.collections channels = self.page.get_channels()
def walk_res(path, collections): if len(split_path) == 0:
if len(path) == 0 or not isinstance(collections, (list, Collection)): for channel in channels:
return collections if len(channel.split_path) == 1:
i = path[0] yield channel
matches = [collection elif len(split_path) == 1:
for collection in collections for channel in channels:
if collection.id == i or collection.title == i] if len(channel.split_path) == 2 and split_path[0] == channel.split_path[0]:
if not len(matches): yield channel
raise CollectionNotFound(path) elif len(split_path) == 2:
subchannels = self.iter_resources(split_path[0:1])
channel = None
for subchannel in subchannels:
# allow matching by title for backward compatibility (for now)
if split_path[0] == subchannel.split_path[0] and \
split_path[1] in (subchannel.split_path[1], subchannel.title):
channel = subchannel
if channel:
self.location("http://service.canal-plus.com/video/rest/getMEAs/cplus/%s" % channel.id)
assert self.is_on_page(VideoPage)
for video in self.page.iter_channel():
yield video
else:
raise CollectionNotFound(split_path)
return walk_res(path[1:], matches[0]) else:
raise CollectionNotFound(split_path)
return walk_res(split_path, collections)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Nicolas Duhamel # Copyright(C) 2010-2012 Nicolas Duhamel, Laurent Bachelier
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -26,24 +26,19 @@ __all__ = ['InitPage']
class InitPage(BasePage): class InitPage(BasePage):
def on_loaded(self): def get_channels(self):
self.collections = [] """
Extract all possible channels (paths) from the page
def do(_id): """
self.browser.location("http://service.canal-plus.com/video/rest/getMEAs/cplus/%s" % _id) channels = list()
return self.browser.page.iter_channel()
# Parse the list of channels
for elem in self.document[2].getchildren(): for elem in self.document[2].getchildren():
children = []
for e in elem.getchildren(): for e in elem.getchildren():
if e.tag == "NOM": if e.tag == "NOM":
_id = e.text.strip() name = e.text.strip()
channels.append(Collection([name]))
elif e.tag == "SELECTIONS": elif e.tag == "SELECTIONS":
for select in e: for select in e:
sub = Collection(_id=select[0].text, sub = Collection([name, select[0].text],
title=select[1].text.strip(), title=select[1].text.strip())
fct=do) channels.append(sub)
children.append(sub) return channels
coll = Collection(_id, children=children)
self.collections.append(coll)

View file

@ -104,7 +104,7 @@ class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection, ICapVideo):
def iter_resources(self, objs, split_path): def iter_resources(self, objs, split_path):
if Radio in objs: if Radio in objs:
if len(split_path) == 1 and split_path[0] == 'francebleu': if split_path == [u'francebleu']:
for _id in sorted(self._RADIOS.iterkeys()): for _id in sorted(self._RADIOS.iterkeys()):
if _id.startswith('fb'): if _id.startswith('fb'):
yield self.get_radio(_id) yield self.get_radio(_id)
@ -112,13 +112,12 @@ class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection, ICapVideo):
for _id in sorted(self._RADIOS.iterkeys()): for _id in sorted(self._RADIOS.iterkeys()):
if not _id.startswith('fb'): if not _id.startswith('fb'):
yield self.get_radio(_id) yield self.get_radio(_id)
yield Collection('francebleu', 'France Bleu', yield Collection(['francebleu'], 'France Bleu')
children=self.iter_resources(objs, ['francebleu']))
else: else:
raise CollectionNotFound(split_path) raise CollectionNotFound(split_path)
def iter_radios_search(self, pattern): def iter_radios_search(self, pattern):
for radio in self._flatten_resources(self.iter_resources((Radio, ), [])): for radio in self.iter_resources_flat((Radio, ), []):
if pattern.lower() in radio.title.lower() or pattern.lower() in radio.description.lower(): if pattern.lower() in radio.title.lower() or pattern.lower() in radio.description.lower():
yield radio yield radio

View file

@ -97,7 +97,7 @@ class RedmineBackend(BaseBackend, ICapContent, ICapBugTracker, ICapCollection):
def iter_resources(self, objs, split_path): def iter_resources(self, objs, split_path):
if Project in objs or Issue in objs: if Project in objs or Issue in objs:
if len(split_path) == 0: if len(split_path) == 0:
return [Collection(project.id, project.name, fct=self.iter_issues) return [Collection([project.id], project.name)
for project in self.iter_projects()] for project in self.iter_projects()]
if len(split_path) == 1: if len(split_path) == 1:

View file

@ -136,7 +136,7 @@ class BoobTracker(ReplApplication):
query.category = self.options.category query.category = self.options.category
query.status = self.options.status query.status = self.options.status
self.change_path('/%s/search' % query.project) self.change_path([query.project, u'search'])
for backend, issue in self.do('iter_issues', query, backends=backends): for backend, issue in self.do('iter_issues', query, backends=backends):
self.add_object(issue) self.add_object(issue)
self.format(issue) self.format(issue)

View file

@ -147,7 +147,7 @@ class Flatboob(ReplApplication):
query.cost_max = self.ask_int('Enter max cost') query.cost_max = self.ask_int('Enter max cost')
query.nb_rooms = self.ask_int('Enter number of rooms') query.nb_rooms = self.ask_int('Enter number of rooms')
self.change_path('/housings') self.change_path([u'housings'])
for backend, housing in self.do('search_housings', query): for backend, housing in self.do('search_housings', query):
self.add_object(housing) self.add_object(housing)
self.format(housing) self.format(housing)

View file

@ -135,7 +135,7 @@ class Radioob(ReplApplication):
If PATTERN is not given, this command will list all the radios. If PATTERN is not given, this command will list all the radios.
""" """
self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'All radios') self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'All radios')
self.change_path('/search') self.change_path([u'search'])
for backend, radio in self.do('iter_radios_search', pattern=pattern): for backend, radio in self.do('iter_radios_search', pattern=pattern):
self.add_object(radio) self.add_object(radio)
self.format(radio) self.format(radio)

View file

@ -223,7 +223,7 @@ class Videoob(ReplApplication):
return 1 return 1
self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest videos') self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest videos')
self.change_path('/search') self.change_path([u'search'])
for backend, video in self.do('search_videos', pattern=pattern, nsfw=self.nsfw, for backend, video in self.do('search_videos', pattern=pattern, nsfw=self.nsfw,
max_results=self.options.count): max_results=self.options.count):
self.add_object(video) self.add_object(video)

View file

@ -175,7 +175,7 @@ class Weboorrents(ReplApplication):
Search torrents. Search torrents.
""" """
self.change_path('/search') self.change_path([u'search'])
if not pattern: if not pattern:
pattern = None pattern = None
self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest torrents') self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest torrents')

View file

@ -96,7 +96,7 @@ class WetBoobs(ReplApplication):
Search cities. Search cities.
""" """
self.change_path('/cities') self.change_path(['cities'])
for backend, city in self.do('iter_city_search', pattern, caps=ICapWeather): for backend, city in self.do('iter_city_search', pattern, caps=ICapWeather):
self.add_object(city) self.add_object(city)
self.format(city) self.format(city)
@ -143,7 +143,7 @@ class WetBoobs(ReplApplication):
List all rivers. If PATTERN is specified, search on a pattern. List all rivers. If PATTERN is specified, search on a pattern.
""" """
self.change_path('/gauges') self.change_path([u'gauges'])
for backend, gauge in self.do('iter_gauges', pattern or None, caps=ICapWaterLevel): for backend, gauge in self.do('iter_gauges', pattern or None, caps=ICapWaterLevel):
self.add_object(gauge) self.add_object(gauge)
self.format(gauge) self.format(gauge)

View file

@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .base import IBaseCap from .base import IBaseCap, CapBaseObject
__all__ = ['ICapCollection', 'Collection', 'CollectionNotFound'] __all__ = ['ICapCollection', 'Collection', 'CollectionNotFound']
@ -31,42 +31,18 @@ class CollectionNotFound(Exception):
Exception.__init__(self, msg) Exception.__init__(self, msg)
class Children(object): class Collection(CapBaseObject):
""" """
Dynamic property of a Collection. A Collection is a "fake" object returned in results, which shows you can get
Returns a list, either by calling a function or because more results if you go into its path.
it already has the list.
"""
def __get__(self, obj, type=None):
if obj._children is None:
if callable(obj._fct):
obj._children = obj._fct(obj.id)
return obj._children or []
It is a dumb object, it must not contain callbacks to a backend.
class Collection(object):
""" """
Collection of objects. def __init__(self, split_path, backend=None, title=None):
Should provide a way to be filled, either by providing the children self.split_path = split_path
right away, or a function. The function will be called once with the id
as an argument if there were no children provided, but only on demand.
It can be found in a list of objects, it indicantes a "folder"
you can hop into.
id and title should be unicode.
"""
children = Children()
backend = None
def __init__(self, _id=None, title=None, children=None, fct=None):
self.id = _id
self.title = title self.title = title
# It does not make sense to have both at init _id = split_path[-1] if len(split_path) else None
assert not (fct is not None and children is not None) CapBaseObject.__init__(self, _id, backend)
self._children = children
self._fct = fct
def __iter__(self):
return iter(self.children)
def __unicode__(self): def __unicode__(self):
if self.title and self.id: if self.title and self.id:
@ -78,19 +54,19 @@ class Collection(object):
class ICapCollection(IBaseCap): class ICapCollection(IBaseCap):
def _flatten_resources(self, resources, clean_only=False): def iter_resources_flat(self, objs, split_path, clean_only=False):
""" """
Expand all collections in a list Call iter_resources() to fetch all resources in the tree.
If clean_only is True, do not expand collections, only remove them. If clean_only is True, do not explore paths, only remove them.
split_path is used to set the starting path.
""" """
lst = list() for resource in self.iter_resources(objs, split_path):
for resource in resources: if isinstance(resource, Collection):
if isinstance(resource, (list, Collection)):
if not clean_only: if not clean_only:
lst.extend(self._flatten_resources(resource)) for res in self.iter_resources_flat(objs, resource.split_path):
yield res
else: else:
lst.append(resource) yield resource
return lst
def iter_resources(self, objs, split_path): def iter_resources(self, objs, split_path):
""" """

View file

@ -30,7 +30,7 @@ from weboob.capabilities.base import FieldNotFound, CapBaseObject
from weboob.core import CallErrors from weboob.core import CallErrors
from weboob.tools.application.formatters.iformatter import MandatoryFieldsNotFound from weboob.tools.application.formatters.iformatter import MandatoryFieldsNotFound
from weboob.tools.misc import to_unicode from weboob.tools.misc import to_unicode
from weboob.tools.path import Path from weboob.tools.path import WorkingPath
from weboob.tools.ordereddict import OrderedDict from weboob.tools.ordereddict import OrderedDict
from weboob.capabilities.collection import Collection, ICapCollection, CollectionNotFound from weboob.capabilities.collection import Collection, ICapCollection, CollectionNotFound
@ -137,23 +137,22 @@ class ReplApplication(Cmd, ConsoleApplication):
self._interactive = False self._interactive = False
self.objects = [] self.objects = []
self.collections = [] self.collections = []
self.working_path = Path() self.working_path = WorkingPath()
@property @property
def interactive(self): def interactive(self):
return self._interactive return self._interactive
def _change_prompt(self): def _change_prompt(self):
path = self.working_path.tostring() if len(self.working_path.get()):
if len(path) > 0 and path != '/': self.prompt = u'%s:%s> ' % (self.APPNAME, unicode(self.working_path))
self.prompt = '%s:%s> ' % (self.APPNAME, path)
else: else:
self.prompt = '%s> ' % (self.APPNAME) self.prompt = u'%s> ' % (self.APPNAME)
self.objects = [] self.objects = []
self.collections = [] self.collections = []
def change_path(self, path): def change_path(self, split_path):
self.working_path.fromstring(path) self.working_path.location(split_path)
self._change_prompt() self._change_prompt()
def add_object(self, obj): def add_object(self, obj):
@ -885,16 +884,19 @@ class ReplApplication(Cmd, ConsoleApplication):
cd [PATH] cd [PATH]
Follow a path. Follow a path.
If empty, return home. ".." is a special case and goes up one directory.
"" is a special case and goes home.
""" """
if not len(line.strip()): if not len(line.strip()):
self.working_path.home() self.working_path.home()
elif line.strip() == '..':
self.working_path.up()
else: else:
self.working_path.extend(line) self.working_path.cd1(line)
objects, collections = self._fetch_objects(objs=self.COLLECTION_OBJECTS) objects, collections = self._fetch_objects(objs=self.COLLECTION_OBJECTS)
if len(objects) + len(collections) == 0: if len(objects) + len(collections) == 0:
print >>sys.stderr, "Path: %s not found" % self.working_path.tostring() print >>sys.stderr, u"Path: %s not found" % unicode(self.working_path)
self.working_path.restore() self.working_path.restore()
return 1 return 1

View file

@ -16,64 +16,80 @@
# #
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib from copy import copy
import posixpath from posixpath import sep, join
import copy
class Path(object):
class WorkingPath(object):
def __init__(self): def __init__(self):
self._working_path = [] self.split_path = []
self._previous = self._working_path self.previous = copy(self.split_path)
def cd1(self, user_input):
def extend(self, user_input):
""" """
Add a new part to the current path Append *one* level to the current path.
This means that separators (/) will get escaped.
""" """
split_path = self.get()
split_path.append(user_input)
self.location(split_path)
user_input = urllib.quote_plus(user_input) def location(self, split_path):
user_input = posixpath.normpath(user_input) """
Go to a new path, and store the previous path.
escape = lambda s: s.replace('/', '%2F') """
current_path = map(escape, self._working_path) self.previous = self.get()
self.split_path = split_path
abspath = posixpath.normpath(posixpath.join('/' + '/'.join(current_path), user_input))
abspath = abspath.split('/')[1:]
while len(abspath) > 0 and abspath[0] == u'': del abspath[0]
final_parse = map(urllib.unquote_plus, abspath)
self._previous = self._working_path
if len(final_parse) == 0:
self._working_path = []
self._working_path = final_parse
def restore(self): def restore(self):
""" """
Go to the previous path Go to the previous path
""" """
self._working_path = self._previous self.split_path, self.previous = self.previous, self.split_path
def home(self): def home(self):
""" """
Go to the root Go to the root
""" """
self._previous = self._working_path self.location([])
self._working_path = []
def up(self):
"""
Go up one directory
"""
self.location(self.split_path[:-1])
def get(self): def get(self):
return copy.copy(self._working_path) """
Get the current working path
"""
return copy(self.split_path)
def fromstring(self, path): def __unicode__(self):
if path[0] == '/': return join(sep, *[s.replace(u'/', u'\/') for s in self.split_path])
path = path[1:]
escape = lambda s: s.replace('\/', '/')
self._working_path = map(escape, path.split('/'))
def tostring(self):
escape = lambda s: s.replace('/', '\/') def test():
path = map(escape, self._working_path) wp = WorkingPath()
return '/' + '/'.join(path) assert wp.get() == []
assert unicode(wp) == u'/'
wp.cd1(u'lol')
assert wp.get() == [u'lol']
assert unicode(wp) == u'/lol'
wp.cd1(u'cat')
assert wp.get() == [u'lol', u'cat']
assert unicode(wp) == u'/lol/cat'
wp.restore()
assert unicode(wp) == u'/lol'
wp.home()
assert wp.get() == []
assert unicode(wp) == u'/'
wp.up()
assert wp.get() == []
assert unicode(wp) == u'/'
wp.location(['aa / aa', 'bbbb'])
assert unicode(wp) == u'/aa \/ aa/bbbb'
wp.up()
assert unicode(wp) == u'/aa \/ aa'
wp.cd1(u'héhé/hé')
assert unicode(wp) == u'/aa \/ aa/héhé\/hé'