diff --git a/modules/wordreference/browser.py b/modules/wordreference/browser.py index f039971b..06afed72 100644 --- a/modules/wordreference/browser.py +++ b/modules/wordreference/browser.py @@ -18,34 +18,22 @@ # along with weboob. If not, see . -import urllib - -from weboob.deprecated.browser import Browser - +from weboob.browser import PagesBrowser, URL from .pages import TranslatePage __all__ = ['WordReferenceBrowser'] -class WordReferenceBrowser(Browser): - DOMAIN = 'www.wordreference.com' - ENCODING = 'UTF-8' - USER_AGENT = Browser.USER_AGENTS['desktop_firefox'] - PAGES = { - 'https?://www\.wordreference\.com/.*/.*': TranslatePage - } - - def __init__(self, *args, **kwargs): - Browser.__init__(self, *args, **kwargs) +class WordReferenceBrowser(PagesBrowser): + BASEURL = 'http://www.wordreference.com' + translation_page = URL('(?P[a-z]{2})(?P[a-z]{2})/(?P.*)', TranslatePage) def translate(self, source, to, text): """ translate 'text' from 'source' language to 'to' language """ - sl = source.encode('utf-8') - tl = to.encode('utf-8') - text = text.encode('utf-8') - self.location('http://'+self.DOMAIN+'/'+sl+tl+'/'+urllib.quote(text)) - translation = self.page.get_translation() - return translation + + return self.translation_page.go(sl=source.encode('utf-8'), + tl=to.encode('utf-8'), + pattern=text.encode('utf-8')).get_translation() diff --git a/modules/wordreference/module.py b/modules/wordreference/module.py index ca3c63b4..7b5a7ec7 100644 --- a/modules/wordreference/module.py +++ b/modules/wordreference/module.py @@ -19,7 +19,7 @@ "backend for http://www.wordreference.com" -from weboob.capabilities.translate import CapTranslate, Translation, TranslationFail, LanguageNotSupported +from weboob.capabilities.translate import CapTranslate, TranslationFail, LanguageNotSupported from weboob.tools.backend import Module from .browser import WordReferenceBrowser @@ -37,9 +37,9 @@ class WordReferenceModule(Module, CapTranslate): DESCRIPTION = u'Free online translator' BROWSER = WordReferenceBrowser WRLANGUAGE = { - 'Arabic':'ar', 'Chinese':'zh', 'Czech':'cz', 'English':'en', 'French':'fr', 'Greek':'gr', - 'Italian':'it', 'Japanese':'ja', 'Korean':'ko', 'Polish':'pl', 'Portuguese':'pt', - 'Romanian':'ro', 'Spanish':'es', 'Turkish':'tr', + 'Arabic': 'ar', 'Chinese': 'zh', 'Czech': 'cz', 'English': 'en', 'French': 'fr', 'Greek': 'gr', + 'Italian': 'it', 'Japanese': 'ja', 'Korean': 'ko', 'Polish': 'pl', 'Portuguese': 'pt', + 'Romanian': 'ro', 'Spanish': 'es', 'Turkish': 'tr', } def translate(self, lan_from, lan_to, text): @@ -49,12 +49,12 @@ class WordReferenceModule(Module, CapTranslate): if lan_to not in self.WRLANGUAGE.keys(): raise LanguageNotSupported() - translation = Translation(0) - translation.lang_src = unicode(self.WRLANGUAGE[lan_from]) - translation.lang_dst = unicode(self.WRLANGUAGE[lan_to]) - translation.text = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text) + translations = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text) + has_translation = False - if translation.text is None: + for translation in translations: + has_translation = True + yield translation + + if not has_translation: raise TranslationFail() - - return translation diff --git a/modules/wordreference/pages.py b/modules/wordreference/pages.py index 252f8861..f028d49a 100644 --- a/modules/wordreference/pages.py +++ b/modules/wordreference/pages.py @@ -17,48 +17,22 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . - -from weboob.deprecated.browser import Page -import re +from weboob.browser.pages import HTMLPage +from weboob.browser.elements import ItemElement, ListElement, method +from weboob.capabilities.translate import Translation +from weboob.browser.filters.standard import CleanText, Regexp, Env +from weboob.browser.filters.html import CleanHTML -LAST_THING_IN_PARENTHESIS = re.compile("\([^)]\)$") +class TranslatePage(HTMLPage): + @method + class get_translation(ListElement): + item_xpath = '//table[@class="WRD" and not(@id)]/tr[@id]' + class item(ItemElement): + klass = Translation -class TranslatePage(Page): - def get_translation(self): - trs = self.document.getroot().xpath("//table[@class='WRD']/tr[@class='even']") - if trs and len(trs) > 0: - # taking the first signification in the case several were found - return self.parser.select(trs[0], "td[@class='ToWrd']", 1, method='xpath').text - """ - # taking the first signification in the case several were found - for tr in self.document.getiterator('tr'): - prev_was_nums1 = False - for td in tr.getiterator('td'): - if prev_was_nums1: - result = u''+td.text_content().split(';')[0].strip() - result = LAST_THING_IN_PARENTHESIS.sub("",result) - return result - if td.attrib.get('class','') == 'nums1': - prev_was_nums1 = True - # if only one signification is found - for div in self.document.getiterator('div'): - if div.attrib.get('class','') == "trans clickable": - if ']' in div.text_content(): - tnames = div.text_content().split(']')[1].split()[1:] - else: - tnames = div.text_content().split()[1:] - names = u''+" ".join(tnames).split(';')[0] - names = LAST_THING_IN_PARENTHESIS.sub("",names) - return names.strip() - # another numerotation possibility... - for table in self.document.getiterator('table'): - if table.attrib.get('class','') == "trans clickable": - prev_was_roman1 = False - for td in table.getiterator('td'): - if prev_was_roman1: - return u''+td.text_content().split(';')[0].strip() - if td.attrib.get('class','') == 'roman1': - prev_was_roman1 = True - """ + obj_id = Regexp(CleanText('./@id'), '.*:(.*)') + obj_lang_src = Env('sl') + obj_lang_dst = Env('tl') + obj_text = CleanHTML('./td[@class="ToWrd"]')