From c9debbc619a048e7312c01121a7791e523276665 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Tue, 23 Mar 2010 14:19:59 +0100 Subject: [PATCH] fix unicode issues --- weboob/backends/sncf/canaltp.py | 13 +++++---- weboob/frontends/travel/application.py | 18 ++++++------ weboob/tools/misc.py | 39 ++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 15 deletions(-) create mode 100644 weboob/tools/misc.py diff --git a/weboob/backends/sncf/canaltp.py b/weboob/backends/sncf/canaltp.py index 87b31f33..5c4f402a 100644 --- a/weboob/backends/sncf/canaltp.py +++ b/weboob/backends/sncf/canaltp.py @@ -20,6 +20,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from datetime import datetime, date, time from weboob.tools.browser import Browser +from weboob.tools.misc import toUnicode class CanalTP(Browser): DOMAIN = 'widget.canaltp.fr' @@ -37,11 +38,11 @@ class CanalTP(Browser): except ValueError: continue else: - yield _id, name + yield _id, toUnicode(name) def iter_station_departures(self, station_id): result = self.openurl(u"http://widget.canaltp.fr/Prochains_departs_15122009/dev/index.php?gare=%s" % unicode(station_id)).read() - result = unicode(result, "utf8") + result = result departure = '' for line in result.split('&'): key, value = line.split('=', 1) @@ -49,12 +50,12 @@ class CanalTP(Browser): departure = value elif key.startswith('ligne'): _type, unknown, _time, arrival, served, late, late_reason = value.split(';', 6) - yield {'type': _type, + yield {'type': toUnicode(_type), 'time': datetime.combine(date.today(), time(*[int(x) for x in _time.split(':')])), - 'departure': departure, - 'arrival': arrival, + 'departure': toUnicode(departure), + 'arrival': toUnicode(arrival).strip(), 'late': late and time(0, int(late.split()[0])) or time(), - 'late_reason': late_reason} + 'late_reason': toUnicode(late_reason).replace('\n', '').strip()} def home(self): pass diff --git a/weboob/frontends/travel/application.py b/weboob/frontends/travel/application.py index 7161d671..089d09a5 100644 --- a/weboob/frontends/travel/application.py +++ b/weboob/frontends/travel/application.py @@ -78,21 +78,21 @@ class Application(BaseApplication): print ' departures List all departures on a special station' def command_stations(self, pattern): - print ".-----------------.----------------------------------------." - print '| ID | Name |' - print '+-----------------+----------------------------------------+' + print ".--------------------------------.---------------------------------------------." + print '| ID | Name |' + print '+--------------------------------+---------------------------------------------+' count = 0 for name, backend, in self.weboob.iter_backends(): for station in backend.iter_station_search(pattern): - print '| %-15s | %-38s |' % (station.id, station.name) + print '| %-30s | %-43s |' % (station.id, station.name) count += 1 - print "+-----------------'----------------------------------------+" - print "| %3d stations listed |" % count - print "'----------------------------------------------------------'" + print "+--------------------------------'---------------------------------------------+" + print "| %3d stations listed |" % count + print "'------------------------------------------------------------------------------'" def command_departures(self, station): print ".-----.-----------.-------.-----------------------.-------.--------------------." - print "| ID | Type | Time | Arrival | Late | Info |" + print "| ID | Type | Time | Arrival | Late | Info |" print "+-----+-----------+-------+-----------------------+-------+--------------------+" count = 0 for name, backend, in self.weboob.iter_backends(): @@ -102,7 +102,7 @@ class Application(BaseApplication): departure.time.strftime("%H:%M"), departure.arrival_station, departure.late and departure.late.strftime("%H:%M") or '', - departure.information.replace('\n', '').strip()) + departure.information) count += 1 print "+-----'-----------'-------'-----------------------'-------'--------------------+" print "| %3d departures listed |" % count diff --git a/weboob/tools/misc.py b/weboob/tools/misc.py new file mode 100644 index 00000000..e0e7e8b2 --- /dev/null +++ b/weboob/tools/misc.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Romain Bignon + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +def toUnicode(text): + r""" + >>> toUnicode('ascii') + u'ascii' + >>> toUnicode(u'utf\xe9'.encode('UTF-8')) + u'utf\xe9' + >>> toUnicode(u'unicode') + u'unicode' + """ + if isinstance(text, unicode): + return text + if not isinstance(text, str): + text = str(text) + try: + return unicode(text, "utf8") + except UnicodeError: + pass + return unicode(text, "ISO-8859-1") +