diff --git a/modules/googletranslate/backend.py b/modules/googletranslate/backend.py index eea5ebf1..f7da5f9e 100644 --- a/modules/googletranslate/backend.py +++ b/modules/googletranslate/backend.py @@ -19,7 +19,7 @@ "backend for http://translate.google.com" -from weboob.capabilities.translate import ICapTranslate, Translation, TranslationFail +from weboob.capabilities.translate import ICapTranslate, Translation, TranslationFail, LanguageNotSupported from weboob.tools.backend import BaseBackend from .browser import GoogleTranslateBrowser @@ -36,12 +36,30 @@ class GoogleTranslateBackend(BaseBackend, ICapTranslate): NAME = 'googletranslate' DESCRIPTION = u'Google translation web service' BROWSER = GoogleTranslateBrowser + GOOGLELANGUAGE = { + 'Arabic':'ar', 'Afrikaans':'af', 'Albanian':'sq', 'Armenian':'hy', 'Azerbaijani':'az', 'Basque':'eu', 'Belarusian':'be', + 'Bengali':'bn', 'Bulgarian':'bg', 'Catalan':'ca', 'Chinese':'zh-CN', 'Croatian':'hr', 'Czech':'cs', 'Danish':'da', + 'Dutch':'nl', 'English':'en', 'Esperanto':'eo', 'Estonian':'et', 'Filipino':'tl', 'Finnish':'fi', 'French':'fr', + 'Galician':'gl', 'Georgian':'ka', 'German':'de', 'Greek':'gr', 'Gujarati':'gu', 'Haitian':'ht', 'Hebrew':'iw', + 'Hindi':'hi', 'Hungaric':'hu', 'Icelandic':'is', 'Indonesian':'id', 'Irish':'ga', 'Italian':'it', 'Japanese':'ja', + 'Kannada':'kn', 'Korean':'ko', 'Latin':'la', 'Latvian':'lv', 'Lithuanian':'lt', 'Macedonian':'mk', 'Malay':'ms', + 'Maltese':'mt', 'Norwegian':'no', 'Persian':'fa', 'Polish':'pl', 'Portuguese':'pt', 'Romanian':'ro', 'Russian':'ru', + 'Serbian':'sr', 'Slovak':'sk', 'Slovenian':'sl', 'Spanish':'es', 'Swahili':'sw', 'Swedish':'sv', 'Tamil':'ta', + 'Telugu':'te', 'Thai':'th', 'Turkish':'tr', 'Ukrainian':'uk', 'Urdu':'ur', 'Vietnamese':'vi', 'Welsh':'cy', 'Yiddish':'yi', + } + def translate(self, lan_from, lan_to, text): + if not lan_from in self.GOOGLELANGUAGE.keys(): + raise LanguageNotSupported() + + if not lan_to in self.GOOGLELANGUAGE.keys(): + raise LanguageNotSupported() + translation = Translation(0) - translation.lang_src = unicode(lan_from) - translation.lang_dst = unicode(lan_to) - translation.text = self.browser.translate(lan_from, lan_to, text) + translation.lang_src = unicode(self.GOOGLELANGUAGE[lan_from]) + translation.lang_dst = unicode(self.GOOGLELANGUAGE[lan_to]) + translation.text = self.browser.translate(self.GOOGLELANGUAGE[lan_from], self.GOOGLELANGUAGE[lan_to], text) if translation.text is None: raise TranslationFail() diff --git a/modules/googletranslate/browser.py b/modules/googletranslate/browser.py index dce314ce..c9390d3b 100644 --- a/modules/googletranslate/browser.py +++ b/modules/googletranslate/browser.py @@ -35,7 +35,7 @@ class GoogleTranslateBrowser(BaseBrowser): PAGES = { 'https?://translate\.google\.com': TranslatePage } - + def __init__(self, *args, **kwargs): BaseBrowser.__init__(self, *args, **kwargs) diff --git a/modules/googletranslate/pages.py b/modules/googletranslate/pages.py index ec44b4a1..d2d0c94e 100644 --- a/modules/googletranslate/pages.py +++ b/modules/googletranslate/pages.py @@ -30,4 +30,4 @@ class TranslatePage(BasePage): if len(boxes) == 0: return None - return u'\n'.join([unicode(box.text) for box in boxes]) + return u''.join([unicode(box.text) for box in boxes]) diff --git a/modules/googletranslate/test.py b/modules/googletranslate/test.py index 936bfbe3..f6c01200 100644 --- a/modules/googletranslate/test.py +++ b/modules/googletranslate/test.py @@ -28,5 +28,5 @@ class GoogleTranslateTest(BackendTest): BACKEND = 'googletranslate' def test_translate(self): - tr = self.backend.translate('fr', 'en', 'je mange du chocolat') + tr = self.backend.translate('French', 'English', 'je mange du chocolat') self.assertTrue(tr.text == u'I eat chocolate') diff --git a/modules/wordreference/__init__.py b/modules/wordreference/__init__.py new file mode 100644 index 00000000..51bbb3cb --- /dev/null +++ b/modules/wordreference/__init__.py @@ -0,0 +1,24 @@ +"WordReferenceBackend init" +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import WordReferenceBackend + + +__all__ = ['WordReferenceBackend'] diff --git a/modules/wordreference/backend.py b/modules/wordreference/backend.py new file mode 100644 index 00000000..52f0c665 --- /dev/null +++ b/modules/wordreference/backend.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . +"backend for http://www.wordreference.com" + + +from weboob.capabilities.translate import ICapTranslate, Translation, TranslationFail, LanguageNotSupported +from weboob.tools.backend import BaseBackend + +from .browser import WordReferenceBrowser + + +__all__ = ['WordReferenceBackend'] + + +class WordReferenceBackend(BaseBackend, ICapTranslate): + MAINTAINER = 'Lucien Loiseau' + EMAIL = 'loiseau.lucien@gmail.com' + VERSION = '0.c' + LICENSE = 'AGPLv3+' + NAME = 'wordreference' + DESCRIPTION = u'Free online translator' + BROWSER = WordReferenceBrowser + WRLANGUAGE = { + 'Arabic':'ar', 'Chinese':'zh', 'Czech':'cz', 'English':'en', 'French':'fr', 'Greek':'gr', + 'Italian':'it', 'Japanese':'ja', 'Korean':'ko', 'Polish':'pl', 'Portuguese':'pt', + 'Romanian':'ro', 'Spanish':'es', 'Turkish':'tr', + } + + def translate(self, lan_from, lan_to, text): + if not lan_from in self.WRLANGUAGE.keys(): + raise LanguageNotSupported() + + if not lan_to in self.WRLANGUAGE.keys(): + raise LanguageNotSupported() + + translation = Translation(0) + translation.lang_src = unicode(self.WRLANGUAGE[lan_from]) + translation.lang_dst = unicode(self.WRLANGUAGE[lan_to]) + translation.text = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text) + + if translation.text is None: + raise TranslationFail() + + return translation diff --git a/modules/wordreference/browser.py b/modules/wordreference/browser.py new file mode 100644 index 00000000..41c5a7cd --- /dev/null +++ b/modules/wordreference/browser.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import urllib + +from weboob.tools.browser import BaseBrowser + +from .pages import TranslatePage + + +__all__ = ['WordReferenceBrowser'] + + +class WordReferenceBrowser(BaseBrowser): + DOMAIN = 'www.wordreference.com' + ENCODING = 'UTF-8' + USER_AGENT = BaseBrowser.USER_AGENTS['desktop_firefox'] + PAGES = { + 'https?://www\.wordreference\.com/.*/.*': TranslatePage + } + + def __init__(self, *args, **kwargs): + BaseBrowser.__init__(self, *args, **kwargs) + + def translate(self, source, to, text): + """ + translate 'text' from 'source' language to 'to' language + """ + sl = source.encode('utf-8') + tl = to.encode('utf-8') + text = text.encode('utf-8') + self.location('http://'+self.DOMAIN+'/'+sl+tl+'/'+urllib.quote(text)) + translation = self.page.get_translation() + return translation diff --git a/modules/wordreference/pages.py b/modules/wordreference/pages.py new file mode 100644 index 00000000..45ba6402 --- /dev/null +++ b/modules/wordreference/pages.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BasePage + + +__all__ = ['TranslatePage'] + + +class TranslatePage(BasePage): + def get_translation(self): + for tr in self.document.getiterator('tr'): + if tr.attrib.get('class','') == 'odd' or tr.attrib.get('class','') == 'even': + return u''+tr.getchildren()[0].getchildren()[0].text + diff --git a/modules/wordreference/test.py b/modules/wordreference/test.py new file mode 100644 index 00000000..5131cf16 --- /dev/null +++ b/modules/wordreference/test.py @@ -0,0 +1,32 @@ +# -*- CODing: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + + +__all__ = ['WordReferenceTest'] + + +class WordReferenceTest(BackendTest): + BACKEND = 'wordreference' + + def test_translate(self): + tr = self.backend.translate('French', 'English', 'chat') + self.assertTrue(tr.text == u'cat') diff --git a/weboob/applications/translaboob/translaboob.py b/weboob/applications/translaboob/translaboob.py index c2501c28..07400f4d 100644 --- a/weboob/applications/translaboob/translaboob.py +++ b/weboob/applications/translaboob/translaboob.py @@ -18,7 +18,8 @@ # along with weboob. If not, see . -from weboob.capabilities.translate import ICapTranslate +import sys +from weboob.capabilities.translate import ICapTranslate, TranslationFail, LanguageNotSupported from weboob.tools.application.repl import ReplApplication from weboob.tools.application.formatters.iformatter import IFormatter @@ -49,10 +50,22 @@ class Translaboob(ReplApplication): DESCRIPTION = 'Console application to translate text from one language to another' CAPS = ICapTranslate EXTRA_FORMATTERS = {'translation': TranslationFormatter, - 'xmltrans': XmlTranslationFormatter, - } + 'xmltrans': XmlTranslationFormatter, + } COMMANDS_FORMATTERS = {'translate': 'translation', - } + } + LANGUAGE = { + 'ar':'Arabic', 'af':'Afrikaans', 'sq':'Albanian', 'hy':'Armenian', 'az':'Azerbaijani', 'eu':'Basque', 'be':'Belarusian', + 'bn':'Bengali', 'bg':'Bulgarian', 'ca':'Catalan', 'zh':'Chinese', 'hr':'Croatian', 'cz':'Czech', 'da':'Danish', + 'nl':'Dutch', 'en':'English', 'eo':'Esperanto', 'et':'Estonian', 'tl':'Filipino', 'fi':'Finnish', 'fr':'French', + 'gl':'Galician', 'ka':'Georgian', 'de':'German', 'gr':'Greek', 'gu':'Gujarati', 'ht':'Haitian', 'iw':'Hebrew', + 'hi':'Hindi', 'hu':'Hungaric', 'is':'Icelandic', 'id':'Indonesian', 'ga':'Irish', 'it':'Italian', 'ja':'Japanese', + 'kn':'Kannada', 'ko':'Korean', 'la':'Latin', 'lv':'Latvian', 'lt':'Lithuanian', 'mk':'Macedonian', 'ms':'Malay', + 'mt':'Maltese', 'no':'Norwegian', 'fa':'Persian', 'pl':'Polish', 'pt':'Portuguese', 'ro':'Romanian', 'ru':'Russian', + 'sr':'Serbian', 'sk':'Slovak', 'sl':'Slovenian', 'es':'Spanish', 'sw':'Swahili', 'sv':'Swedish', 'ta':'Tamil', + 'te':'Telugu', 'th':'Thai', 'tr':'Turkish', 'uk':'Ukrainian', 'ur':'Urdu', 'vi':'Vietnamese', 'cy':'Welsh', 'yi':'Yiddish', + } + def do_translate(self, line): """ @@ -62,14 +75,43 @@ class Translaboob(ReplApplication): * FROM : source language * TO : destination language * TEXT : language to translate, standart input if - is given + + Language Abbreviation + ---------------------- + Arabic ar Esperanto eo Irish ga Russian ru + Afrikaans af Estonian et Italian it Serbian sr + Albanian sq Filipino tl Japanese ja Slovak sk + Armenian hy Finnish fi Kannada kn Slovenian sl + Azerbaijani az French fr Korean ko Spanish es + Basque eu Galician gl Latin la Swahili sw + Belarusian be Georgian ka Latvian lv Swedish sv + Bengali bn German de Lithuanian lt Tamil ta + Bulgarian bg Greek gr Macedonian mk Telugu te + Catalan ca Gujarati gu Malay ms Thai th + Chinese zh Haitian ht Maltese mt Turkish tr + Croatian hr Hebrew iw Norwegian no Ukrainian uk + Czech cz Hindi hi Persian fa Urdu ur + Danish da Hungaric hu Polish pl Vietnamese vi + Dutch nl Icelandic is Portuguese pt Welsh cy + English en Indonesian id Romanian ro Yiddish yi + ---------------------- """ lan_from, lan_to, text = self.parse_command_args(line, 3, 2) - if not text or text == '-': - text = self.acquire_input() + try: + if not lan_from in self.LANGUAGE.keys(): + raise LanguageNotSupported() + if not lan_to in self.LANGUAGE.keys(): + raise LanguageNotSupported() - self.start_format(source=text) - for backend, translation in self.do('translate', lan_from, lan_to, text): - self.format(translation) - self.flush() + if not text or text == '-': + text = self.acquire_input() + + self.start_format(source=text) + for backend, translation in self.do('translate', self.LANGUAGE[lan_from], self.LANGUAGE[lan_to], text): + self.format(translation) + self.flush() + except (TranslationFail, LanguageNotSupported) as error: + print >>sys.stderr, error + pass diff --git a/weboob/capabilities/translate.py b/weboob/capabilities/translate.py index 27a3494f..e09ed8e2 100644 --- a/weboob/capabilities/translate.py +++ b/weboob/capabilities/translate.py @@ -21,7 +21,16 @@ from .base import IBaseCap, CapBaseObject, StringField -__all__ = ['TranslationFail', 'ICapTranslate'] +__all__ = ['TranslationFail', 'LanguageNotSupported', 'ICapTranslate'] + + +class LanguageNotSupported(Exception): + """ + Raised when the language is not supported + """ + + def __init__(self, msg='language is not supported'): + Exception.__init__(self,msg) class TranslationFail(Exception):