common language interface and wordreference backend

Conflicts:

	modules/wordreference/pages.py
	weboob/applications/translaboob/translaboob.py
This commit is contained in:
lucien 2012-04-03 09:41:38 +02:00 committed by Romain Bignon
commit 657e2213ac
11 changed files with 287 additions and 19 deletions

View file

@ -19,7 +19,7 @@
"backend for http://translate.google.com"
from weboob.capabilities.translate import ICapTranslate, Translation, TranslationFail
from weboob.capabilities.translate import ICapTranslate, Translation, TranslationFail, LanguageNotSupported
from weboob.tools.backend import BaseBackend
from .browser import GoogleTranslateBrowser
@ -36,12 +36,30 @@ class GoogleTranslateBackend(BaseBackend, ICapTranslate):
NAME = 'googletranslate'
DESCRIPTION = u'Google translation web service'
BROWSER = GoogleTranslateBrowser
GOOGLELANGUAGE = {
'Arabic':'ar', 'Afrikaans':'af', 'Albanian':'sq', 'Armenian':'hy', 'Azerbaijani':'az', 'Basque':'eu', 'Belarusian':'be',
'Bengali':'bn', 'Bulgarian':'bg', 'Catalan':'ca', 'Chinese':'zh-CN', 'Croatian':'hr', 'Czech':'cs', 'Danish':'da',
'Dutch':'nl', 'English':'en', 'Esperanto':'eo', 'Estonian':'et', 'Filipino':'tl', 'Finnish':'fi', 'French':'fr',
'Galician':'gl', 'Georgian':'ka', 'German':'de', 'Greek':'gr', 'Gujarati':'gu', 'Haitian':'ht', 'Hebrew':'iw',
'Hindi':'hi', 'Hungaric':'hu', 'Icelandic':'is', 'Indonesian':'id', 'Irish':'ga', 'Italian':'it', 'Japanese':'ja',
'Kannada':'kn', 'Korean':'ko', 'Latin':'la', 'Latvian':'lv', 'Lithuanian':'lt', 'Macedonian':'mk', 'Malay':'ms',
'Maltese':'mt', 'Norwegian':'no', 'Persian':'fa', 'Polish':'pl', 'Portuguese':'pt', 'Romanian':'ro', 'Russian':'ru',
'Serbian':'sr', 'Slovak':'sk', 'Slovenian':'sl', 'Spanish':'es', 'Swahili':'sw', 'Swedish':'sv', 'Tamil':'ta',
'Telugu':'te', 'Thai':'th', 'Turkish':'tr', 'Ukrainian':'uk', 'Urdu':'ur', 'Vietnamese':'vi', 'Welsh':'cy', 'Yiddish':'yi',
}
def translate(self, lan_from, lan_to, text):
if not lan_from in self.GOOGLELANGUAGE.keys():
raise LanguageNotSupported()
if not lan_to in self.GOOGLELANGUAGE.keys():
raise LanguageNotSupported()
translation = Translation(0)
translation.lang_src = unicode(lan_from)
translation.lang_dst = unicode(lan_to)
translation.text = self.browser.translate(lan_from, lan_to, text)
translation.lang_src = unicode(self.GOOGLELANGUAGE[lan_from])
translation.lang_dst = unicode(self.GOOGLELANGUAGE[lan_to])
translation.text = self.browser.translate(self.GOOGLELANGUAGE[lan_from], self.GOOGLELANGUAGE[lan_to], text)
if translation.text is None:
raise TranslationFail()

View file

@ -35,7 +35,7 @@ class GoogleTranslateBrowser(BaseBrowser):
PAGES = {
'https?://translate\.google\.com': TranslatePage
}
def __init__(self, *args, **kwargs):
BaseBrowser.__init__(self, *args, **kwargs)

View file

@ -30,4 +30,4 @@ class TranslatePage(BasePage):
if len(boxes) == 0:
return None
return u'\n'.join([unicode(box.text) for box in boxes])
return u''.join([unicode(box.text) for box in boxes])

View file

@ -28,5 +28,5 @@ class GoogleTranslateTest(BackendTest):
BACKEND = 'googletranslate'
def test_translate(self):
tr = self.backend.translate('fr', 'en', 'je mange du chocolat')
tr = self.backend.translate('French', 'English', 'je mange du chocolat')
self.assertTrue(tr.text == u'I eat chocolate')

View file

@ -0,0 +1,24 @@
"WordReferenceBackend init"
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Lucien Loiseau
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import WordReferenceBackend
__all__ = ['WordReferenceBackend']

View file

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Lucien Loiseau
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
"backend for http://www.wordreference.com"
from weboob.capabilities.translate import ICapTranslate, Translation, TranslationFail, LanguageNotSupported
from weboob.tools.backend import BaseBackend
from .browser import WordReferenceBrowser
__all__ = ['WordReferenceBackend']
class WordReferenceBackend(BaseBackend, ICapTranslate):
MAINTAINER = 'Lucien Loiseau'
EMAIL = 'loiseau.lucien@gmail.com'
VERSION = '0.c'
LICENSE = 'AGPLv3+'
NAME = 'wordreference'
DESCRIPTION = u'Free online translator'
BROWSER = WordReferenceBrowser
WRLANGUAGE = {
'Arabic':'ar', 'Chinese':'zh', 'Czech':'cz', 'English':'en', 'French':'fr', 'Greek':'gr',
'Italian':'it', 'Japanese':'ja', 'Korean':'ko', 'Polish':'pl', 'Portuguese':'pt',
'Romanian':'ro', 'Spanish':'es', 'Turkish':'tr',
}
def translate(self, lan_from, lan_to, text):
if not lan_from in self.WRLANGUAGE.keys():
raise LanguageNotSupported()
if not lan_to in self.WRLANGUAGE.keys():
raise LanguageNotSupported()
translation = Translation(0)
translation.lang_src = unicode(self.WRLANGUAGE[lan_from])
translation.lang_dst = unicode(self.WRLANGUAGE[lan_to])
translation.text = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text)
if translation.text is None:
raise TranslationFail()
return translation

View file

@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Lucien Loiseau
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
from weboob.tools.browser import BaseBrowser
from .pages import TranslatePage
__all__ = ['WordReferenceBrowser']
class WordReferenceBrowser(BaseBrowser):
DOMAIN = 'www.wordreference.com'
ENCODING = 'UTF-8'
USER_AGENT = BaseBrowser.USER_AGENTS['desktop_firefox']
PAGES = {
'https?://www\.wordreference\.com/.*/.*': TranslatePage
}
def __init__(self, *args, **kwargs):
BaseBrowser.__init__(self, *args, **kwargs)
def translate(self, source, to, text):
"""
translate 'text' from 'source' language to 'to' language
"""
sl = source.encode('utf-8')
tl = to.encode('utf-8')
text = text.encode('utf-8')
self.location('http://'+self.DOMAIN+'/'+sl+tl+'/'+urllib.quote(text))
translation = self.page.get_translation()
return translation

View file

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Lucien Loiseau
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BasePage
__all__ = ['TranslatePage']
class TranslatePage(BasePage):
def get_translation(self):
for tr in self.document.getiterator('tr'):
if tr.attrib.get('class','') == 'odd' or tr.attrib.get('class','') == 'even':
return u''+tr.getchildren()[0].getchildren()[0].text

View file

@ -0,0 +1,32 @@
# -*- CODing: utf-8 -*-
# Copyright(C) 2012 Lucien Loiseau
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
__all__ = ['WordReferenceTest']
class WordReferenceTest(BackendTest):
BACKEND = 'wordreference'
def test_translate(self):
tr = self.backend.translate('French', 'English', 'chat')
self.assertTrue(tr.text == u'cat')

View file

@ -18,7 +18,8 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.translate import ICapTranslate
import sys
from weboob.capabilities.translate import ICapTranslate, TranslationFail, LanguageNotSupported
from weboob.tools.application.repl import ReplApplication
from weboob.tools.application.formatters.iformatter import IFormatter
@ -49,10 +50,22 @@ class Translaboob(ReplApplication):
DESCRIPTION = 'Console application to translate text from one language to another'
CAPS = ICapTranslate
EXTRA_FORMATTERS = {'translation': TranslationFormatter,
'xmltrans': XmlTranslationFormatter,
}
'xmltrans': XmlTranslationFormatter,
}
COMMANDS_FORMATTERS = {'translate': 'translation',
}
}
LANGUAGE = {
'ar':'Arabic', 'af':'Afrikaans', 'sq':'Albanian', 'hy':'Armenian', 'az':'Azerbaijani', 'eu':'Basque', 'be':'Belarusian',
'bn':'Bengali', 'bg':'Bulgarian', 'ca':'Catalan', 'zh':'Chinese', 'hr':'Croatian', 'cz':'Czech', 'da':'Danish',
'nl':'Dutch', 'en':'English', 'eo':'Esperanto', 'et':'Estonian', 'tl':'Filipino', 'fi':'Finnish', 'fr':'French',
'gl':'Galician', 'ka':'Georgian', 'de':'German', 'gr':'Greek', 'gu':'Gujarati', 'ht':'Haitian', 'iw':'Hebrew',
'hi':'Hindi', 'hu':'Hungaric', 'is':'Icelandic', 'id':'Indonesian', 'ga':'Irish', 'it':'Italian', 'ja':'Japanese',
'kn':'Kannada', 'ko':'Korean', 'la':'Latin', 'lv':'Latvian', 'lt':'Lithuanian', 'mk':'Macedonian', 'ms':'Malay',
'mt':'Maltese', 'no':'Norwegian', 'fa':'Persian', 'pl':'Polish', 'pt':'Portuguese', 'ro':'Romanian', 'ru':'Russian',
'sr':'Serbian', 'sk':'Slovak', 'sl':'Slovenian', 'es':'Spanish', 'sw':'Swahili', 'sv':'Swedish', 'ta':'Tamil',
'te':'Telugu', 'th':'Thai', 'tr':'Turkish', 'uk':'Ukrainian', 'ur':'Urdu', 'vi':'Vietnamese', 'cy':'Welsh', 'yi':'Yiddish',
}
def do_translate(self, line):
"""
@ -62,14 +75,43 @@ class Translaboob(ReplApplication):
* FROM : source language
* TO : destination language
* TEXT : language to translate, standart input if - is given
Language Abbreviation
----------------------
Arabic ar Esperanto eo Irish ga Russian ru
Afrikaans af Estonian et Italian it Serbian sr
Albanian sq Filipino tl Japanese ja Slovak sk
Armenian hy Finnish fi Kannada kn Slovenian sl
Azerbaijani az French fr Korean ko Spanish es
Basque eu Galician gl Latin la Swahili sw
Belarusian be Georgian ka Latvian lv Swedish sv
Bengali bn German de Lithuanian lt Tamil ta
Bulgarian bg Greek gr Macedonian mk Telugu te
Catalan ca Gujarati gu Malay ms Thai th
Chinese zh Haitian ht Maltese mt Turkish tr
Croatian hr Hebrew iw Norwegian no Ukrainian uk
Czech cz Hindi hi Persian fa Urdu ur
Danish da Hungaric hu Polish pl Vietnamese vi
Dutch nl Icelandic is Portuguese pt Welsh cy
English en Indonesian id Romanian ro Yiddish yi
----------------------
"""
lan_from, lan_to, text = self.parse_command_args(line, 3, 2)
if not text or text == '-':
text = self.acquire_input()
try:
if not lan_from in self.LANGUAGE.keys():
raise LanguageNotSupported()
if not lan_to in self.LANGUAGE.keys():
raise LanguageNotSupported()
self.start_format(source=text)
for backend, translation in self.do('translate', lan_from, lan_to, text):
self.format(translation)
self.flush()
if not text or text == '-':
text = self.acquire_input()
self.start_format(source=text)
for backend, translation in self.do('translate', self.LANGUAGE[lan_from], self.LANGUAGE[lan_to], text):
self.format(translation)
self.flush()
except (TranslationFail, LanguageNotSupported) as error:
print >>sys.stderr, error
pass

View file

@ -21,7 +21,16 @@
from .base import IBaseCap, CapBaseObject, StringField
__all__ = ['TranslationFail', 'ICapTranslate']
__all__ = ['TranslationFail', 'LanguageNotSupported', 'ICapTranslate']
class LanguageNotSupported(Exception):
"""
Raised when the language is not supported
"""
def __init__(self, msg='language is not supported'):
Exception.__init__(self,msg)
class TranslationFail(Exception):