boobot: Display regular URL info

This commit is contained in:
Laurent Bachelier 2013-04-20 02:01:50 +02:00
commit 99eaa87db9

View file

@ -23,10 +23,17 @@ import logging
import re import re
import sys import sys
from threading import Thread, Event from threading import Thread, Event
from math import log
from irc.bot import SingleServerIRCBot from irc.bot import SingleServerIRCBot
import mechanize
from mechanize import _headersutil as headersutil
import lxml.html
from weboob.core import Weboob from weboob.core import Weboob
from weboob.tools.browser import StandardBrowser, BrowserUnavailable
from weboob.tools.misc import get_backtrace from weboob.tools.misc import get_backtrace
from weboob.tools.misc import to_unicode
from weboob.tools.storage import StandardStorage from weboob.tools.storage import StandardStorage
IRC_CHANNEL = '#weboob' IRC_CHANNEL = '#weboob'
@ -35,6 +42,39 @@ IRC_SERVER = 'chat.freenode.net'
STORAGE_FILE = 'boobot.storage' STORAGE_FILE = 'boobot.storage'
class HeadRequest(mechanize.Request):
def get_method(self):
return "HEAD"
class BoobotBrowser(StandardBrowser):
def urlinfo(self, url):
b = StandardBrowser()
r = b.openurl(HeadRequest(url))
headers = r.info()
content_type = headers.get('Content-Type')
try:
size = int(headers.get('Content-Length'))
hsize = self.human_size(size)
except TypeError:
size = None
hsize = None
is_html = headersutil.is_html([content_type], url, True)
title = None
if is_html:
h = lxml.html.fromstring(self.readurl(url))
for title in h.xpath('//head/title'):
title = to_unicode(title.text_content())
return content_type, hsize, title
def human_size(self, size):
if size:
units = ('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB')
exponent = int(log(size, 1024))
return "%.1f %s" % (float(size) / pow(1024, exponent), units[exponent])
return '0 B'
class MyThread(Thread): class MyThread(Thread):
def __init__(self, bot): def __init__(self, bot):
Thread.__init__(self) Thread.__init__(self)
@ -111,19 +151,36 @@ class TestBot(SingleServerIRCBot):
else: else:
text = ' '.join(event.arguments) text = ' '.join(event.arguments)
for m in re.findall('([\w\d_\-]+@\w+)', text): for m in re.findall('([\w\d_\-]+@\w+)', text):
id, backend_name = m.split('@', 1) self.on_boobid(m)
for m in re.findall('(https?://\S+)', text):
self.on_url(m)
def on_boobid(self, boobid):
_id, backend_name = boobid.split('@', 1)
if backend_name in self.weboob.backend_instances: if backend_name in self.weboob.backend_instances:
backend = self.weboob.backend_instances[backend_name] backend = self.weboob.backend_instances[backend_name]
for cap in backend.iter_caps(): for cap in backend.iter_caps():
func = 'obj_info_%s' % cap.__name__[4:].lower() func = 'obj_info_%s' % cap.__name__[4:].lower()
if hasattr(self, func): if hasattr(self, func):
try: try:
getattr(self, func)(backend, id) getattr(self, func)(backend, _id)
except Exception, e: except Exception, e:
print get_backtrace() print get_backtrace()
self.send_message('Oops: [%s] %s' % (type(e).__name__, e)) self.send_message('Oops: [%s] %s' % (type(e).__name__, e))
break break
def on_url(self, url):
try:
content_type, hsize, title = BoobotBrowser().urlinfo(url)
if title:
self.send_message(u'URL: %s' % title)
elif hsize:
self.send_message(u'URL (file): %s, %s' % (content_type, hsize))
else:
self.send_message(u'URL (file): %s' % content_type)
except BrowserUnavailable as e:
self.send_message(u'URL (error): %s' % e)
def obj_info_video(self, backend, id): def obj_info_video(self, backend, id):
v = backend.get_video(id) v = backend.get_video(id)
if v: if v: