boobot: Fix weird/unicode URLs
http://stackoverflow.com/questions/1916684/cant-open-unicode-url-with-python
This commit is contained in:
parent
2f9a50c754
commit
45151afa3e
1 changed files with 48 additions and 8 deletions
|
|
@ -25,6 +25,8 @@ import os
|
||||||
import sys
|
import sys
|
||||||
from threading import Thread, Event
|
from threading import Thread, Event
|
||||||
from math import log
|
from math import log
|
||||||
|
import urlparse
|
||||||
|
import urllib
|
||||||
|
|
||||||
from irc.bot import SingleServerIRCBot
|
from irc.bot import SingleServerIRCBot
|
||||||
import mechanize
|
import mechanize
|
||||||
|
|
@ -43,6 +45,40 @@ IRC_SERVER = os.getenv('BOOBOT_SERVER', 'chat.freenode.net')
|
||||||
STORAGE_FILE = 'boobot.storage'
|
STORAGE_FILE = 'boobot.storage'
|
||||||
|
|
||||||
|
|
||||||
|
def fixurl(url):
|
||||||
|
# turn string into unicode
|
||||||
|
if not isinstance(url, unicode):
|
||||||
|
url = url.decode('utf8')
|
||||||
|
|
||||||
|
# parse it
|
||||||
|
parsed = urlparse.urlsplit(url)
|
||||||
|
|
||||||
|
# divide the netloc further
|
||||||
|
userpass, at, hostport = parsed.netloc.rpartition('@')
|
||||||
|
user, colon1, pass_ = userpass.partition(':')
|
||||||
|
host, colon2, port = hostport.partition(':')
|
||||||
|
|
||||||
|
# encode each component
|
||||||
|
scheme = parsed.scheme.encode('utf8')
|
||||||
|
user = urllib.quote(user.encode('utf8'))
|
||||||
|
colon1 = colon1.encode('utf8')
|
||||||
|
pass_ = urllib.quote(pass_.encode('utf8'))
|
||||||
|
at = at.encode('utf8')
|
||||||
|
host = host.encode('idna')
|
||||||
|
colon2 = colon2.encode('utf8')
|
||||||
|
port = port.encode('utf8')
|
||||||
|
path = '/'.join( # could be encoded slashes!
|
||||||
|
urllib.quote(urllib.unquote(pce).encode('utf8'), '')
|
||||||
|
for pce in parsed.path.split('/')
|
||||||
|
)
|
||||||
|
query = urllib.quote(urllib.unquote(parsed.query).encode('utf8'), '=&?/')
|
||||||
|
fragment = urllib.quote(urllib.unquote(parsed.fragment).encode('utf8'))
|
||||||
|
|
||||||
|
# put it back together
|
||||||
|
netloc = ''.join((user, colon1, pass_, at, host, colon2, port))
|
||||||
|
return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
|
||||||
|
|
||||||
|
|
||||||
class HeadRequest(mechanize.Request):
|
class HeadRequest(mechanize.Request):
|
||||||
def get_method(self):
|
def get_method(self):
|
||||||
return "HEAD"
|
return "HEAD"
|
||||||
|
|
@ -70,7 +106,8 @@ class BoobotBrowser(StandardBrowser):
|
||||||
|
|
||||||
def human_size(self, size):
|
def human_size(self, size):
|
||||||
if size:
|
if size:
|
||||||
units = ('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB')
|
units = ('B', 'KiB', 'MiB', 'GiB',
|
||||||
|
'TiB', 'PiB', 'EiB', 'ZiB', 'YiB')
|
||||||
exponent = int(log(size, 1024))
|
exponent = int(log(size, 1024))
|
||||||
return "%.1f %s" % (float(size) / pow(1024, exponent), units[exponent])
|
return "%.1f %s" % (float(size) / pow(1024, exponent), units[exponent])
|
||||||
return '0 B'
|
return '0 B'
|
||||||
|
|
@ -94,7 +131,8 @@ class MyThread(Thread):
|
||||||
self.weboob.loop()
|
self.weboob.loop()
|
||||||
|
|
||||||
def find_keywords(self, text):
|
def find_keywords(self, text):
|
||||||
for word in ['weboob', 'videoob', 'havesex', 'havedate', 'monboob', 'boobmsg',
|
for word in [
|
||||||
|
'weboob', 'videoob', 'havesex', 'havedate', 'monboob', 'boobmsg',
|
||||||
'flatboob', 'boobill', 'pastoob', 'radioob', 'translaboob', 'traveloob',
|
'flatboob', 'boobill', 'pastoob', 'radioob', 'translaboob', 'traveloob',
|
||||||
'boobathon', 'boobank', 'boobtracker', 'comparoob', 'wetboobs',
|
'boobathon', 'boobank', 'boobtracker', 'comparoob', 'wetboobs',
|
||||||
'webcontentedit', 'weboorrents', u'sàt', u'salut à toi', 'ass2m',
|
'webcontentedit', 'weboorrents', u'sàt', u'salut à toi', 'ass2m',
|
||||||
|
|
@ -108,7 +146,8 @@ class MyThread(Thread):
|
||||||
word = self.find_keywords(msg.content)
|
word = self.find_keywords(msg.content)
|
||||||
if word is not None:
|
if word is not None:
|
||||||
url = msg.signature[msg.signature.find('https://linuxfr'):]
|
url = msg.signature[msg.signature.find('https://linuxfr'):]
|
||||||
self.bot.send_message('[DLFP] %s talks about %s: %s' % (msg.sender, word, url))
|
self.bot.send_message('[DLFP] %s talks about %s: %s' % (
|
||||||
|
msg.sender, word, url))
|
||||||
backend.set_message_read(msg)
|
backend.set_message_read(msg)
|
||||||
|
|
||||||
def check_board(self):
|
def check_board(self):
|
||||||
|
|
@ -129,7 +168,7 @@ class MyThread(Thread):
|
||||||
class Boobot(SingleServerIRCBot):
|
class Boobot(SingleServerIRCBot):
|
||||||
def __init__(self, channels, nickname, server, port=6667):
|
def __init__(self, channels, nickname, server, port=6667):
|
||||||
SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
|
SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
|
||||||
#self.connection.add_global_handler('pubmsg', self.on_pubmsg)
|
# self.connection.add_global_handler('pubmsg', self.on_pubmsg)
|
||||||
self.connection.add_global_handler('join', self.on_join)
|
self.connection.add_global_handler('join', self.on_join)
|
||||||
self.connection.add_global_handler('welcome', self.on_welcome)
|
self.connection.add_global_handler('welcome', self.on_welcome)
|
||||||
|
|
||||||
|
|
@ -185,6 +224,7 @@ class Boobot(SingleServerIRCBot):
|
||||||
break
|
break
|
||||||
|
|
||||||
def on_url(self, url):
|
def on_url(self, url):
|
||||||
|
url = fixurl(url)
|
||||||
try:
|
try:
|
||||||
content_type, hsize, title = BoobotBrowser().urlinfo(url)
|
content_type, hsize, title = BoobotBrowser().urlinfo(url)
|
||||||
if title:
|
if title:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue