Make Collection more safe and sane

* Remove callbacks in Collection object Make Collection a "dumb" object (and also a base object, though it isn't very useful for now) * Rename Path to WorkingPath, because it is more about managing state than being a single path. * Rewrite almost all WorkingPath, because the code was overly complicated for no reason (I tried some special cases and it turned out that fromstring didn't handle them, and that the quote-escape-unquote was just unecessary). I also rewrote it to be more pythonic (no more lambdas and maps) and added tests. * Require the full split path when creating a Collection. Because, come to think of it, an object needs an unique identifier; in the case of Collections, it is the full path, not only its last part. I might even replace the id by the full split path in the future. * There is now only one way to get items of a Collection: calling iter_resources(). * Rewrite flatten_resources to iter_resources_flat(), which just calls iter_resources() recursively. * Rewrite the collection part of the canalplus module. There is no more callback or a page calling the browser to check another page! The logic is only in iter_resources(). The resulting code is not very pretty, but it should get better. As a bonus, avoid to reload the main XML file when we already have it open. * change_path() now expects a split path and not a string. * up/home special cases for "cd" are handled in the same place, and store the previous place properly (but are not yet exploitable by an user command). This is a big commit but it would be hard to split it in *working* commits. If you read this entire commit message, I will buy you a beer. refs #774 fixes #773
2012-03-08 00:33:49 +01:00 · 2012-03-08 00:33:49 +01:00 · b4b7182960
commit b4b7182960
parent 1dd26e5ffe
13 changed files with 147 additions and 148 deletions
--- a/modules/canalplus/browser.py
+++ b/modules/canalplus/browser.py
@ -28,13 +28,13 @@ from weboob.tools.browser.decorators import id2url
 from .pages import InitPage, VideoPage
 from .video import CanalplusVideo

-from weboob.capabilities.collection import Collection, CollectionNotFound
+from weboob.capabilities.collection import CollectionNotFound

 __all__ = ['CanalplusBrowser']


 class XMLParser(object):
-     def parse(self, data, encoding=None):
+    def parse(self, data, encoding=None):
        if encoding is None:
            parser = None
        else:
@ -60,11 +60,8 @@ class CanalplusBrowser(BaseBrowser):
        }

    def __init__(self, quality, *args, **kwargs):
-        BaseBrowser.__init__(self, parser= self.PARSER, *args, **kwargs)
-        if quality in self.FORMATS:
-            self.quality = self.FORMATS[quality]
-        else:
-            self.quality = 'HD'
+        BaseBrowser.__init__(self, parser=self.PARSER, *args, **kwargs)
+        self.quality = self.FORMATS.get(quality, self.FORMATS['hd'])

    def home(self):
        self.location('http://service.canal-plus.com/video/rest/initPlayer/cplus/')
@ -79,19 +76,33 @@ class CanalplusBrowser(BaseBrowser):
        return self.page.get_video(video, self.quality)

    def iter_resources(self, split_path):
-        self.home()
-        collections = self.page.collections
+        if not self.is_on_page(InitPage):
+            self.home()
+        channels = self.page.get_channels()

-        def walk_res(path, collections):
-            if len(path) == 0 or not isinstance(collections, (list, Collection)):
-                return collections
-            i = path[0]
-            matches = [collection
-                        for collection in collections
-                        if collection.id == i or collection.title == i]
-            if not len(matches):
-                raise CollectionNotFound(path)
+        if len(split_path) == 0:
+            for channel in channels:
+                if len(channel.split_path) == 1:
+                    yield channel
+        elif len(split_path) == 1:
+            for channel in channels:
+                if len(channel.split_path) == 2 and split_path[0] == channel.split_path[0]:
+                        yield channel
+        elif len(split_path) == 2:
+            subchannels = self.iter_resources(split_path[0:1])
+            channel = None
+            for subchannel in subchannels:
+                # allow matching by title for backward compatibility (for now)
+                if split_path[0] == subchannel.split_path[0] and \
+                    split_path[1] in (subchannel.split_path[1], subchannel.title):
+                        channel = subchannel
+            if channel:
+                self.location("http://service.canal-plus.com/video/rest/getMEAs/cplus/%s" % channel.id)
+                assert self.is_on_page(VideoPage)
+                for video in self.page.iter_channel():
+                    yield video
+            else:
+                raise CollectionNotFound(split_path)

-            return walk_res(path[1:], matches[0])
-
-        return walk_res(split_path, collections)
+        else:
+            raise CollectionNotFound(split_path)
--- a/modules/canalplus/pages/initpage.py
+++ b/modules/canalplus/pages/initpage.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright(C) 2010-2011 Nicolas Duhamel
+# Copyright(C) 2010-2012 Nicolas Duhamel, Laurent Bachelier
 #
 # This file is part of weboob.
 #
@ -26,24 +26,19 @@ __all__ = ['InitPage']


 class InitPage(BasePage):
-    def on_loaded(self):
-        self.collections = []
-
-        def do(_id):
-            self.browser.location("http://service.canal-plus.com/video/rest/getMEAs/cplus/%s" % _id)
-            return self.browser.page.iter_channel()
-
-        # Parse the list of channels
+    def get_channels(self):
+        """
+        Extract all possible channels (paths) from the page
+        """
+        channels = list()
        for elem in self.document[2].getchildren():
-            children = []
            for e in elem.getchildren():
                if e.tag == "NOM":
-                    _id = e.text.strip()
+                    name = e.text.strip()
+                    channels.append(Collection([name]))
                elif e.tag == "SELECTIONS":
                    for select in e:
-                        sub = Collection(_id=select[0].text,
-                                title=select[1].text.strip(),
-                                fct=do)
-                        children.append(sub)
-            coll = Collection(_id, children=children)
-            self.collections.append(coll)
+                        sub = Collection([name, select[0].text],
+                                title=select[1].text.strip())
+                        channels.append(sub)
+        return channels
--- a/modules/radiofrance/backend.py
+++ b/modules/radiofrance/backend.py
@ -104,7 +104,7 @@ class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection, ICapVideo):

    def iter_resources(self, objs, split_path):
        if Radio in objs:
-            if len(split_path) == 1 and split_path[0] == 'francebleu':
+            if split_path == [u'francebleu']:
                for _id in sorted(self._RADIOS.iterkeys()):
                    if _id.startswith('fb'):
                        yield self.get_radio(_id)
@ -112,13 +112,12 @@ class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection, ICapVideo):
                for _id in sorted(self._RADIOS.iterkeys()):
                    if not _id.startswith('fb'):
                        yield self.get_radio(_id)
-                yield Collection('francebleu', 'France Bleu',
-                        children=self.iter_resources(objs, ['francebleu']))
+                yield Collection(['francebleu'], 'France Bleu')
            else:
                raise CollectionNotFound(split_path)

    def iter_radios_search(self, pattern):
-        for radio in self._flatten_resources(self.iter_resources((Radio, ), [])):
+        for radio in self.iter_resources_flat((Radio, ), []):
            if pattern.lower() in radio.title.lower() or pattern.lower() in radio.description.lower():
                yield radio

--- a/modules/redmine/backend.py
+++ b/modules/redmine/backend.py
@ -97,7 +97,7 @@ class RedmineBackend(BaseBackend, ICapContent, ICapBugTracker, ICapCollection):
    def iter_resources(self, objs, split_path):
        if Project in objs or Issue in objs:
            if len(split_path) == 0:
-                return [Collection(project.id, project.name, fct=self.iter_issues)
+                return [Collection([project.id], project.name)
                        for project in self.iter_projects()]

            if len(split_path) == 1:
--- a/weboob/applications/boobtracker/boobtracker.py
+++ b/weboob/applications/boobtracker/boobtracker.py
@ -136,7 +136,7 @@ class BoobTracker(ReplApplication):
        query.category = self.options.category
        query.status = self.options.status

-        self.change_path('/%s/search' % query.project)
+        self.change_path([query.project, u'search'])
        for backend, issue in self.do('iter_issues', query, backends=backends):
            self.add_object(issue)
            self.format(issue)
--- a/weboob/applications/flatboob/flatboob.py
+++ b/weboob/applications/flatboob/flatboob.py
@ -147,7 +147,7 @@ class Flatboob(ReplApplication):
        query.cost_max = self.ask_int('Enter max cost')
        query.nb_rooms = self.ask_int('Enter number of rooms')

-        self.change_path('/housings')
+        self.change_path([u'housings'])
        for backend, housing in self.do('search_housings', query):
            self.add_object(housing)
            self.format(housing)
--- a/weboob/applications/radioob/radioob.py
+++ b/weboob/applications/radioob/radioob.py
@ -135,7 +135,7 @@ class Radioob(ReplApplication):
        If PATTERN is not given, this command will list all the radios.
        """
        self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'All radios')
-        self.change_path('/search')
+        self.change_path([u'search'])
        for backend, radio in self.do('iter_radios_search', pattern=pattern):
            self.add_object(radio)
            self.format(radio)
--- a/weboob/applications/videoob/videoob.py
+++ b/weboob/applications/videoob/videoob.py
@ -223,7 +223,7 @@ class Videoob(ReplApplication):
            return 1

        self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest videos')
-        self.change_path('/search')
+        self.change_path([u'search'])
        for backend, video in self.do('search_videos', pattern=pattern, nsfw=self.nsfw,
                                      max_results=self.options.count):
            self.add_object(video)
--- a/weboob/applications/weboorrents/weboorrents.py
+++ b/weboob/applications/weboorrents/weboorrents.py
@ -175,7 +175,7 @@ class Weboorrents(ReplApplication):

        Search torrents.
        """
-        self.change_path('/search')
+        self.change_path([u'search'])
        if not pattern:
            pattern = None
        self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest torrents')
--- a/weboob/applications/wetboobs/wetboobs.py
+++ b/weboob/applications/wetboobs/wetboobs.py
@ -96,7 +96,7 @@ class WetBoobs(ReplApplication):

        Search cities.
        """
-        self.change_path('/cities')
+        self.change_path(['cities'])
        for backend, city in self.do('iter_city_search', pattern, caps=ICapWeather):
            self.add_object(city)
            self.format(city)
@ -143,7 +143,7 @@ class WetBoobs(ReplApplication):

        List all rivers. If PATTERN is specified, search on a pattern.
        """
-        self.change_path('/gauges')
+        self.change_path([u'gauges'])
        for backend, gauge in self.do('iter_gauges', pattern or None, caps=ICapWaterLevel):
            self.add_object(gauge)
            self.format(gauge)
--- a/weboob/capabilities/collection.py
+++ b/weboob/capabilities/collection.py
@ -17,7 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

-from .base import IBaseCap
+from .base import IBaseCap, CapBaseObject

 __all__ = ['ICapCollection', 'Collection', 'CollectionNotFound']

@ -31,42 +31,18 @@ class CollectionNotFound(Exception):
        Exception.__init__(self, msg)


-class Children(object):
+class Collection(CapBaseObject):
    """
-    Dynamic property of a Collection.
-    Returns a list, either by calling a function or because
-    it already has the list.
-    """
-    def __get__(self, obj, type=None):
-        if obj._children is None:
-            if callable(obj._fct):
-                obj._children = obj._fct(obj.id)
-        return obj._children or []
+    A Collection is a "fake" object returned in results, which shows you can get
+    more results if you go into its path.

-
-class Collection(object):
+    It is a dumb object, it must not contain callbacks to a backend.
    """
-    Collection of objects.
-    Should provide a way to be filled, either by providing the children
-    right away, or a function. The function will be called once with the id
-    as an argument if there were no children provided, but only on demand.
-    It can be found in a list of objects, it indicantes a "folder"
-    you can hop into.
-    id and title should be unicode.
-    """
-    children = Children()
-    backend = None
-
-    def __init__(self, _id=None, title=None, children=None, fct=None):
-        self.id = _id
+    def __init__(self, split_path, backend=None, title=None):
+        self.split_path = split_path
        self.title = title
-        # It does not make sense to have both at init
-        assert not (fct is not None and children is not None)
-        self._children = children
-        self._fct = fct
-
-    def __iter__(self):
-        return iter(self.children)
+        _id = split_path[-1] if len(split_path) else None
+        CapBaseObject.__init__(self, _id, backend)

    def __unicode__(self):
        if self.title and self.id:
@ -78,19 +54,19 @@ class Collection(object):


 class ICapCollection(IBaseCap):
-    def _flatten_resources(self, resources, clean_only=False):
+    def iter_resources_flat(self, objs, split_path, clean_only=False):
        """
-        Expand all collections in a list
-        If clean_only is True, do not expand collections, only remove them.
+        Call iter_resources() to fetch all resources in the tree.
+        If clean_only is True, do not explore paths, only remove them.
+        split_path is used to set the starting path.
        """
-        lst = list()
-        for resource in resources:
-            if isinstance(resource, (list, Collection)):
+        for resource in self.iter_resources(objs, split_path):
+            if isinstance(resource, Collection):
                if not clean_only:
-                    lst.extend(self._flatten_resources(resource))
+                    for res in self.iter_resources_flat(objs, resource.split_path):
+                        yield res
            else:
-                lst.append(resource)
-        return lst
+                yield resource

    def iter_resources(self, objs, split_path):
        """
--- a/weboob/tools/application/repl.py
+++ b/weboob/tools/application/repl.py
@ -30,7 +30,7 @@ from weboob.capabilities.base import FieldNotFound, CapBaseObject
 from weboob.core import CallErrors
 from weboob.tools.application.formatters.iformatter import MandatoryFieldsNotFound
 from weboob.tools.misc import to_unicode
-from weboob.tools.path import Path
+from weboob.tools.path import WorkingPath
 from weboob.tools.ordereddict import OrderedDict
 from weboob.capabilities.collection import Collection, ICapCollection, CollectionNotFound

@ -137,23 +137,22 @@ class ReplApplication(Cmd, ConsoleApplication):
        self._interactive = False
        self.objects = []
        self.collections = []
-        self.working_path = Path()
+        self.working_path = WorkingPath()

    @property
    def interactive(self):
        return self._interactive

    def _change_prompt(self):
-        path = self.working_path.tostring()
-        if len(path) > 0 and path != '/':
-            self.prompt = '%s:%s> ' % (self.APPNAME, path)
+        if len(self.working_path.get()):
+            self.prompt = u'%s:%s> ' % (self.APPNAME, unicode(self.working_path))
        else:
-            self.prompt = '%s> ' % (self.APPNAME)
+            self.prompt = u'%s> ' % (self.APPNAME)
        self.objects = []
        self.collections = []

-    def change_path(self, path):
-        self.working_path.fromstring(path)
+    def change_path(self, split_path):
+        self.working_path.location(split_path)
        self._change_prompt()

    def add_object(self, obj):
@ -885,16 +884,19 @@ class ReplApplication(Cmd, ConsoleApplication):
        cd [PATH]

        Follow a path.
-        If empty, return home.
+        ".." is a special case and goes up one directory.
+        "" is a special case and goes home.
        """
        if not len(line.strip()):
            self.working_path.home()
+        elif line.strip() == '..':
+            self.working_path.up()
        else:
-            self.working_path.extend(line)
+            self.working_path.cd1(line)

        objects, collections = self._fetch_objects(objs=self.COLLECTION_OBJECTS)
        if len(objects) + len(collections) == 0:
-            print >>sys.stderr, "Path: %s not found" % self.working_path.tostring()
+            print >>sys.stderr, u"Path: %s not found" % unicode(self.working_path)
            self.working_path.restore()
            return 1

--- a/weboob/tools/path.py
+++ b/weboob/tools/path.py
@ -16,64 +16,80 @@
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
-import urllib
-import posixpath
-import copy
+from copy import copy
+from posixpath import sep, join

-class Path(object):
+
+class WorkingPath(object):
    def __init__(self):
-        self._working_path = []
-        self._previous = self._working_path
+        self.split_path = []
+        self.previous = copy(self.split_path)

-
-    def extend(self, user_input):
+    def cd1(self, user_input):
        """
-        Add a new part to the current path
+        Append *one* level to the current path.
+        This means that separators (/) will get escaped.
        """
+        split_path = self.get()
+        split_path.append(user_input)
+        self.location(split_path)

-        user_input = urllib.quote_plus(user_input)
-        user_input = posixpath.normpath(user_input)
-
-        escape = lambda s: s.replace('/', '%2F')
-        current_path = map(escape, self._working_path)
-
-        abspath =  posixpath.normpath(posixpath.join('/' + '/'.join(current_path), user_input))
-
-        abspath = abspath.split('/')[1:]
-        while len(abspath) > 0 and abspath[0] == u'': del abspath[0]
-
-        final_parse = map(urllib.unquote_plus, abspath)
-
-        self._previous = self._working_path
-
-        if len(final_parse) == 0:
-            self._working_path = []
-
-        self._working_path = final_parse
+    def location(self, split_path):
+        """
+        Go to a new path, and store the previous path.
+        """
+        self.previous = self.get()
+        self.split_path = split_path

    def restore(self):
        """
        Go to the previous path
        """
-        self._working_path = self._previous
+        self.split_path, self.previous = self.previous, self.split_path

    def home(self):
        """
        Go to the root
        """
-        self._previous = self._working_path
-        self._working_path = []
+        self.location([])
+
+    def up(self):
+        """
+        Go up one directory
+        """
+        self.location(self.split_path[:-1])

    def get(self):
-        return copy.copy(self._working_path)
+        """
+        Get the current working path
+        """
+        return copy(self.split_path)

-    def fromstring(self, path):
-        if path[0] == '/':
-            path = path[1:]
-        escape = lambda s: s.replace('\/', '/')
-        self._working_path = map(escape, path.split('/'))
+    def __unicode__(self):
+        return join(sep, *[s.replace(u'/', u'\/') for s in self.split_path])

-    def tostring(self):
-        escape = lambda s: s.replace('/', '\/')
-        path = map(escape, self._working_path)
-        return '/' + '/'.join(path)
+
+def test():
+    wp = WorkingPath()
+    assert wp.get() == []
+    assert unicode(wp) == u'/'
+    wp.cd1(u'lol')
+    assert wp.get() == [u'lol']
+    assert unicode(wp) == u'/lol'
+    wp.cd1(u'cat')
+    assert wp.get() == [u'lol', u'cat']
+    assert unicode(wp) == u'/lol/cat'
+    wp.restore()
+    assert unicode(wp) == u'/lol'
+    wp.home()
+    assert wp.get() == []
+    assert unicode(wp) == u'/'
+    wp.up()
+    assert wp.get() == []
+    assert unicode(wp) == u'/'
+    wp.location(['aa / aa', 'bbbb'])
+    assert unicode(wp) == u'/aa \/ aa/bbbb'
+    wp.up()
+    assert unicode(wp) == u'/aa \/ aa'
+    wp.cd1(u'héhé/hé')
+    assert unicode(wp) == u'/aa \/ aa/héhé\/hé'