common language interface and wordreference backend

Conflicts:

	modules/wordreference/pages.py
	weboob/applications/translaboob/translaboob.py
This commit is contained in:
lucien 2012-04-03 09:41:38 +02:00 committed by Romain Bignon
commit 657e2213ac
11 changed files with 287 additions and 19 deletions

View file

@ -19,7 +19,7 @@
"backend for http://translate.google.com"
from weboob.capabilities.translate import ICapTranslate, Translation, TranslationFail
from weboob.capabilities.translate import ICapTranslate, Translation, TranslationFail, LanguageNotSupported
from weboob.tools.backend import BaseBackend
from .browser import GoogleTranslateBrowser
@ -36,12 +36,30 @@ class GoogleTranslateBackend(BaseBackend, ICapTranslate):
NAME = 'googletranslate'
DESCRIPTION = u'Google translation web service'
BROWSER = GoogleTranslateBrowser
GOOGLELANGUAGE = {
'Arabic':'ar', 'Afrikaans':'af', 'Albanian':'sq', 'Armenian':'hy', 'Azerbaijani':'az', 'Basque':'eu', 'Belarusian':'be',
'Bengali':'bn', 'Bulgarian':'bg', 'Catalan':'ca', 'Chinese':'zh-CN', 'Croatian':'hr', 'Czech':'cs', 'Danish':'da',
'Dutch':'nl', 'English':'en', 'Esperanto':'eo', 'Estonian':'et', 'Filipino':'tl', 'Finnish':'fi', 'French':'fr',
'Galician':'gl', 'Georgian':'ka', 'German':'de', 'Greek':'gr', 'Gujarati':'gu', 'Haitian':'ht', 'Hebrew':'iw',
'Hindi':'hi', 'Hungaric':'hu', 'Icelandic':'is', 'Indonesian':'id', 'Irish':'ga', 'Italian':'it', 'Japanese':'ja',
'Kannada':'kn', 'Korean':'ko', 'Latin':'la', 'Latvian':'lv', 'Lithuanian':'lt', 'Macedonian':'mk', 'Malay':'ms',
'Maltese':'mt', 'Norwegian':'no', 'Persian':'fa', 'Polish':'pl', 'Portuguese':'pt', 'Romanian':'ro', 'Russian':'ru',
'Serbian':'sr', 'Slovak':'sk', 'Slovenian':'sl', 'Spanish':'es', 'Swahili':'sw', 'Swedish':'sv', 'Tamil':'ta',
'Telugu':'te', 'Thai':'th', 'Turkish':'tr', 'Ukrainian':'uk', 'Urdu':'ur', 'Vietnamese':'vi', 'Welsh':'cy', 'Yiddish':'yi',
}
def translate(self, lan_from, lan_to, text):
if not lan_from in self.GOOGLELANGUAGE.keys():
raise LanguageNotSupported()
if not lan_to in self.GOOGLELANGUAGE.keys():
raise LanguageNotSupported()
translation = Translation(0)
translation.lang_src = unicode(lan_from)
translation.lang_dst = unicode(lan_to)
translation.text = self.browser.translate(lan_from, lan_to, text)
translation.lang_src = unicode(self.GOOGLELANGUAGE[lan_from])
translation.lang_dst = unicode(self.GOOGLELANGUAGE[lan_to])
translation.text = self.browser.translate(self.GOOGLELANGUAGE[lan_from], self.GOOGLELANGUAGE[lan_to], text)
if translation.text is None:
raise TranslationFail()

View file

@ -35,7 +35,7 @@ class GoogleTranslateBrowser(BaseBrowser):
PAGES = {
'https?://translate\.google\.com': TranslatePage
}
def __init__(self, *args, **kwargs):
BaseBrowser.__init__(self, *args, **kwargs)

View file

@ -30,4 +30,4 @@ class TranslatePage(BasePage):
if len(boxes) == 0:
return None
return u'\n'.join([unicode(box.text) for box in boxes])
return u''.join([unicode(box.text) for box in boxes])

View file

@ -28,5 +28,5 @@ class GoogleTranslateTest(BackendTest):
BACKEND = 'googletranslate'
def test_translate(self):
tr = self.backend.translate('fr', 'en', 'je mange du chocolat')
tr = self.backend.translate('French', 'English', 'je mange du chocolat')
self.assertTrue(tr.text == u'I eat chocolate')

View file

@ -0,0 +1,24 @@
"WordReferenceBackend init"
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Lucien Loiseau
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import WordReferenceBackend
__all__ = ['WordReferenceBackend']

View file

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Lucien Loiseau
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
"backend for http://www.wordreference.com"
from weboob.capabilities.translate import ICapTranslate, Translation, TranslationFail, LanguageNotSupported
from weboob.tools.backend import BaseBackend
from .browser import WordReferenceBrowser
__all__ = ['WordReferenceBackend']
class WordReferenceBackend(BaseBackend, ICapTranslate):
MAINTAINER = 'Lucien Loiseau'
EMAIL = 'loiseau.lucien@gmail.com'
VERSION = '0.c'
LICENSE = 'AGPLv3+'
NAME = 'wordreference'
DESCRIPTION = u'Free online translator'
BROWSER = WordReferenceBrowser
WRLANGUAGE = {
'Arabic':'ar', 'Chinese':'zh', 'Czech':'cz', 'English':'en', 'French':'fr', 'Greek':'gr',
'Italian':'it', 'Japanese':'ja', 'Korean':'ko', 'Polish':'pl', 'Portuguese':'pt',
'Romanian':'ro', 'Spanish':'es', 'Turkish':'tr',
}
def translate(self, lan_from, lan_to, text):
if not lan_from in self.WRLANGUAGE.keys():
raise LanguageNotSupported()
if not lan_to in self.WRLANGUAGE.keys():
raise LanguageNotSupported()
translation = Translation(0)
translation.lang_src = unicode(self.WRLANGUAGE[lan_from])
translation.lang_dst = unicode(self.WRLANGUAGE[lan_to])
translation.text = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text)
if translation.text is None:
raise TranslationFail()
return translation

View file

@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Lucien Loiseau
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
from weboob.tools.browser import BaseBrowser
from .pages import TranslatePage
__all__ = ['WordReferenceBrowser']
class WordReferenceBrowser(BaseBrowser):
DOMAIN = 'www.wordreference.com'
ENCODING = 'UTF-8'
USER_AGENT = BaseBrowser.USER_AGENTS['desktop_firefox']
PAGES = {
'https?://www\.wordreference\.com/.*/.*': TranslatePage
}
def __init__(self, *args, **kwargs):
BaseBrowser.__init__(self, *args, **kwargs)
def translate(self, source, to, text):
"""
translate 'text' from 'source' language to 'to' language
"""
sl = source.encode('utf-8')
tl = to.encode('utf-8')
text = text.encode('utf-8')
self.location('http://'+self.DOMAIN+'/'+sl+tl+'/'+urllib.quote(text))
translation = self.page.get_translation()
return translation

View file

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Lucien Loiseau
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BasePage
__all__ = ['TranslatePage']
class TranslatePage(BasePage):
def get_translation(self):
for tr in self.document.getiterator('tr'):
if tr.attrib.get('class','') == 'odd' or tr.attrib.get('class','') == 'even':
return u''+tr.getchildren()[0].getchildren()[0].text

View file

@ -0,0 +1,32 @@
# -*- CODing: utf-8 -*-
# Copyright(C) 2012 Lucien Loiseau
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
__all__ = ['WordReferenceTest']
class WordReferenceTest(BackendTest):
BACKEND = 'wordreference'
def test_translate(self):
tr = self.backend.translate('French', 'English', 'chat')
self.assertTrue(tr.text == u'cat')