add script contrib/downloadboob (closes #838)

2012-10-27 13:11:51 +02:00 · 2012-10-27 13:11:51 +02:00 · 1032e66df5
commit 1032e66df5
parent a46798e375
4 changed files with 263 additions and 0 deletions
--- a/3
+++ b/3
@ -86,6 +86,9 @@ Erwan Jahier <Erwan.Jahier@imag.fr>
 theo <theocrite@theocrite.org>
        * Ergonomics enhancements.

+Alexandre Flament <alex@al-f.net>
+        * Script contrib/downloadboob
+
 BohwaZ <bohwaz@bohwaz.net>
        * Script contrib/report_accounts.sh

--- a/contrib/downloadboob/README
+++ b/contrib/downloadboob/README
@ -0,0 +1,15 @@
+This script can be used to automatically download videos matching some criteria.
+
+To avoid to download a video twice, all videos are stored in an unique way : .files/<backend name>/<video.id>.avi
+
+For each entry in the configuration file, the script :
+- check for new video
+- download the new videos
+- create a link from <user specify name>/<video.name>.avi to .files/<backend name>/<video id>.avi
+
+In each section of the configuration file :
+- backend : the backend to use
+- pattern : specify the search pattern
+- title_exclude : a coma seperated list. If an item in this list is substring of the title, then the video is ignored.
+- max_results : maximum number of result to parse
+- directory : the <user specify name> above.
--- a/contrib/downloadboob/downloadboob.conf
+++ b/contrib/downloadboob/downloadboob.conf
@ -0,0 +1,16 @@
+[main]
+directory=~/Téléchargements/podcasts
+
+[zapping]
+backend=canalplus
+max_results=10
+pattern=zapping
+title_exclude=semaine
+directory=Le zapping
+
+[guignol]
+backend=canalplus
+max_results=10
+pattern=les guignols de l'info
+title_exclude=la semaine
+directory=Les guignols de l'info
--- a/contrib/downloadboob/downloadboob.py
+++ b/contrib/downloadboob/downloadboob.py
@ -0,0 +1,229 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Alexandre Flament
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import string
+import subprocess
+import sys
+import os
+import re
+
+import ConfigParser
+
+from weboob.core import Weboob
+from weboob.capabilities.video import ICapVideo
+
+# hack to workaround bash redirection and encoding problem
+import sys, codecs, locale
+
+if sys.stdout.encoding is None:
+    (lang, enc) = locale.getdefaultlocale()
+    if enc is not None:
+        (e, d, sr, sw) = codecs.lookup(enc)
+        # sw will encode Unicode data to the locale-specific character set.
+        sys.stdout = sw(sys.stdout)
+
+# end of hack
+
+def removeNonAscii(s): return "".join(i for i in s if ord(i)<128)
+
+rx = re.compile(u'[ \\/\\?\\:\\>\\<\\!\\\\\\*]+', re.UNICODE)
+def removeSpecial(s):
+    return rx.sub(u' ', u'%s' % s)
+
+DOWNLOAD_DIRECTORY=".files"
+
+class Downloadboob:
+
+    def __init__(self, backend_name, download_directory, links_directory):
+        self.download_directory = download_directory
+        self.links_directory = links_directory
+        self.backend_name = backend_name
+        self.backend = None
+        self.weboob = Weboob()
+        self.weboob.load_backends(modules=[self.backend_name])
+        self.backend=self.weboob.get_backend(self.backend_name)
+
+    def purge(self):
+        if not os.path.isdir(self.links_directory):
+            return
+        dirList=os.listdir(self.links_directory)
+        for local_link_name in dirList:
+            link_name = self.links_directory + "/" + local_link_name
+            if not self.check_link(link_name):
+                print u"Remove %s" % link_name
+                os.remove(link_name)
+            else:
+                print u"Keep %s" % link_name
+
+    def check_link(self, link_name):
+        if os.path.islink(link_name):
+            file_name = os.readlink(link_name)
+            absolute_file_name = os.path.join(self.links_directory, file_name)
+            if os.path.isfile(absolute_file_name):
+                return True
+            return False
+        else:
+            return True
+
+    def download(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None, title_exclude=[]):
+
+        print "For backend %s, search for '%s'" % (backend_name, pattern)
+
+        # create directory for links
+        print "  create link to %s" % self.links_directory
+        if not os.path.isdir(self.links_directory):
+            os.makedirs(self.links_directory)
+
+        # search for videos
+        count = 0
+        videos = []
+        l = list(self.backend.search_videos(pattern, sortby, nsfw, max_results))
+        for video in l:
+            if not self.is_downloaded(video):
+                self.backend.fill_video(video, ('url','title', 'url', 'duration'))
+                if not(self.is_excluded(video.title, title_exclude)):
+                    print "  %s\n    Id:%s\n    Duration:%s" % (video.title, video.id, video.duration)
+                    videos.append(video)
+            else:
+                print "Already downloaded, check %s" % video.id
+                self.backend.fill_video(video, ('url','title', 'url', 'duration'))
+                self.set_linkname(video)
+
+            count=count+1
+            if count == max_results:
+                break
+
+        # download videos
+        print "Downloading..."
+        for video in videos:
+            self.do_download(video)
+
+    def is_excluded(self, title, title_exclude):
+        for exclude in title_exclude:
+            if title.find(exclude) > -1:
+                return True
+        return False
+
+    def get_filename(self, video, relative=False):
+        if relative:
+            directory = os.path.join("..", DOWNLOAD_DIRECTORY, self.backend_name)
+        else:
+            directory = os.path.join(self.download_directory, self.backend_name)
+
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+
+        ext = video.ext
+        if not ext:
+            ext = 'avi'
+
+        return u"%s/%s.%s" % (directory, removeNonAscii(video.id), ext)
+
+
+    def get_linkname(self, video):
+        if not os.path.exists(self.links_directory):
+            os.makedirs(self.links_directory)
+
+        ext = video.ext
+        if not ext:
+            ext = 'avi'
+
+        misc = video.date
+        if not misc:
+            misc = video.id
+
+        return u"%s/%s (%s).%s" % (self.links_directory, removeSpecial(video.title), removeSpecial(misc), ext)
+
+
+    def is_downloaded(self, video):
+        # check if the file is 0 byte
+        return os.path.isfile(self.get_filename(video))
+
+
+    def set_linkname(self, video):
+        linkname = self.get_linkname(video)
+        idname = self.get_filename(video, relative=True)
+        absolute_idname = self.get_filename(video, relative=False)
+        if not os.path.islink(linkname) and os.path.isfile(absolute_idname):
+            print "%s -> %s" % (linkname, idname)
+            os.symlink(idname, linkname)
+
+
+    def do_download(self, video):
+        if not video:
+            print >>sys.stderr, 'Video not found: %s' %  _id
+            return 3
+
+        if not video.url:
+            print >>sys.stderr, 'Error: the direct URL is not available.'
+            return 4
+
+        def check_exec(executable):
+            with open('/dev/null', 'w') as devnull:
+                process = subprocess.Popen(['which', executable], stdout=devnull)
+                if process.wait() != 0:
+                    print >>sys.stderr, 'Please install "%s"' % executable
+                    return False
+            return True
+
+        dest = self.get_filename(video)
+
+        if video.url.startswith('rtmp'):
+            if not check_exec('rtmpdump'):
+                return 1
+            args = ('rtmpdump', '-r', video.url, '-o', dest)
+        elif video.url.startswith('mms'):
+            if not check_exec('mimms'):
+                return 1
+            args = ('mimms', video.url, dest)
+        else:
+            if not check_exec('wget'):
+                return 1
+            args = ('wget', video.url, '-O', dest)
+
+        os.spawnlp(os.P_WAIT, args[0], *args)
+
+
+config = ConfigParser.ConfigParser()
+config.read(['/etc/downloadboob.conf', os.path.expanduser('~/downloadboob.conf'), 'downloadboob.conf'])
+
+links_directory=os.path.expanduser(config.get('main','directory', '.'))
+links_directory=links_directory.decode('utf-8')
+
+download_directory=os.path.join(links_directory, DOWNLOAD_DIRECTORY)
+
+print "Downloading to %s" % (links_directory)
+
+for section in config.sections():
+    if section != "main":
+        backend_name=config.get(section, "backend")
+        pattern=config.get(section, "pattern")
+        if config.has_option(section, "title_exclude"):
+            title_exclude=config.get(section, "title_exclude").split('|')
+        else:
+            title_exclude=[]
+        max_result=config.getint(section, "max_results")
+        section_sublinks_directory=config.get(section,"directory")
+        section_links_directory=os.path.join(links_directory, section_sublinks_directory)
+
+        downloadboob = Downloadboob(backend_name, download_directory, section_links_directory)
+        downloadboob.purge()
+        # FIXME sortBy, title.match
+        downloadboob.download(pattern, ICapVideo.SEARCH_DATE, False, max_result, title_exclude)