diff --git a/modules/wordreference/browser.py b/modules/wordreference/browser.py
index f039971b..06afed72 100644
--- a/modules/wordreference/browser.py
+++ b/modules/wordreference/browser.py
@@ -18,34 +18,22 @@
# along with weboob. If not, see .
-import urllib
-
-from weboob.deprecated.browser import Browser
-
+from weboob.browser import PagesBrowser, URL
from .pages import TranslatePage
__all__ = ['WordReferenceBrowser']
-class WordReferenceBrowser(Browser):
- DOMAIN = 'www.wordreference.com'
- ENCODING = 'UTF-8'
- USER_AGENT = Browser.USER_AGENTS['desktop_firefox']
- PAGES = {
- 'https?://www\.wordreference\.com/.*/.*': TranslatePage
- }
-
- def __init__(self, *args, **kwargs):
- Browser.__init__(self, *args, **kwargs)
+class WordReferenceBrowser(PagesBrowser):
+ BASEURL = 'http://www.wordreference.com'
+ translation_page = URL('(?P[a-z]{2})(?P[a-z]{2})/(?P.*)', TranslatePage)
def translate(self, source, to, text):
"""
translate 'text' from 'source' language to 'to' language
"""
- sl = source.encode('utf-8')
- tl = to.encode('utf-8')
- text = text.encode('utf-8')
- self.location('http://'+self.DOMAIN+'/'+sl+tl+'/'+urllib.quote(text))
- translation = self.page.get_translation()
- return translation
+
+ return self.translation_page.go(sl=source.encode('utf-8'),
+ tl=to.encode('utf-8'),
+ pattern=text.encode('utf-8')).get_translation()
diff --git a/modules/wordreference/module.py b/modules/wordreference/module.py
index ca3c63b4..7b5a7ec7 100644
--- a/modules/wordreference/module.py
+++ b/modules/wordreference/module.py
@@ -19,7 +19,7 @@
"backend for http://www.wordreference.com"
-from weboob.capabilities.translate import CapTranslate, Translation, TranslationFail, LanguageNotSupported
+from weboob.capabilities.translate import CapTranslate, TranslationFail, LanguageNotSupported
from weboob.tools.backend import Module
from .browser import WordReferenceBrowser
@@ -37,9 +37,9 @@ class WordReferenceModule(Module, CapTranslate):
DESCRIPTION = u'Free online translator'
BROWSER = WordReferenceBrowser
WRLANGUAGE = {
- 'Arabic':'ar', 'Chinese':'zh', 'Czech':'cz', 'English':'en', 'French':'fr', 'Greek':'gr',
- 'Italian':'it', 'Japanese':'ja', 'Korean':'ko', 'Polish':'pl', 'Portuguese':'pt',
- 'Romanian':'ro', 'Spanish':'es', 'Turkish':'tr',
+ 'Arabic': 'ar', 'Chinese': 'zh', 'Czech': 'cz', 'English': 'en', 'French': 'fr', 'Greek': 'gr',
+ 'Italian': 'it', 'Japanese': 'ja', 'Korean': 'ko', 'Polish': 'pl', 'Portuguese': 'pt',
+ 'Romanian': 'ro', 'Spanish': 'es', 'Turkish': 'tr',
}
def translate(self, lan_from, lan_to, text):
@@ -49,12 +49,12 @@ class WordReferenceModule(Module, CapTranslate):
if lan_to not in self.WRLANGUAGE.keys():
raise LanguageNotSupported()
- translation = Translation(0)
- translation.lang_src = unicode(self.WRLANGUAGE[lan_from])
- translation.lang_dst = unicode(self.WRLANGUAGE[lan_to])
- translation.text = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text)
+ translations = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text)
+ has_translation = False
- if translation.text is None:
+ for translation in translations:
+ has_translation = True
+ yield translation
+
+ if not has_translation:
raise TranslationFail()
-
- return translation
diff --git a/modules/wordreference/pages.py b/modules/wordreference/pages.py
index 252f8861..f028d49a 100644
--- a/modules/wordreference/pages.py
+++ b/modules/wordreference/pages.py
@@ -17,48 +17,22 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-
-from weboob.deprecated.browser import Page
-import re
+from weboob.browser.pages import HTMLPage
+from weboob.browser.elements import ItemElement, ListElement, method
+from weboob.capabilities.translate import Translation
+from weboob.browser.filters.standard import CleanText, Regexp, Env
+from weboob.browser.filters.html import CleanHTML
-LAST_THING_IN_PARENTHESIS = re.compile("\([^)]\)$")
+class TranslatePage(HTMLPage):
+ @method
+ class get_translation(ListElement):
+ item_xpath = '//table[@class="WRD" and not(@id)]/tr[@id]'
+ class item(ItemElement):
+ klass = Translation
-class TranslatePage(Page):
- def get_translation(self):
- trs = self.document.getroot().xpath("//table[@class='WRD']/tr[@class='even']")
- if trs and len(trs) > 0:
- # taking the first signification in the case several were found
- return self.parser.select(trs[0], "td[@class='ToWrd']", 1, method='xpath').text
- """
- # taking the first signification in the case several were found
- for tr in self.document.getiterator('tr'):
- prev_was_nums1 = False
- for td in tr.getiterator('td'):
- if prev_was_nums1:
- result = u''+td.text_content().split(';')[0].strip()
- result = LAST_THING_IN_PARENTHESIS.sub("",result)
- return result
- if td.attrib.get('class','') == 'nums1':
- prev_was_nums1 = True
- # if only one signification is found
- for div in self.document.getiterator('div'):
- if div.attrib.get('class','') == "trans clickable":
- if ']' in div.text_content():
- tnames = div.text_content().split(']')[1].split()[1:]
- else:
- tnames = div.text_content().split()[1:]
- names = u''+" ".join(tnames).split(';')[0]
- names = LAST_THING_IN_PARENTHESIS.sub("",names)
- return names.strip()
- # another numerotation possibility...
- for table in self.document.getiterator('table'):
- if table.attrib.get('class','') == "trans clickable":
- prev_was_roman1 = False
- for td in table.getiterator('td'):
- if prev_was_roman1:
- return u''+td.text_content().split(';')[0].strip()
- if td.attrib.get('class','') == 'roman1':
- prev_was_roman1 = True
- """
+ obj_id = Regexp(CleanText('./@id'), '.*:(.*)')
+ obj_lang_src = Env('sl')
+ obj_lang_dst = Env('tl')
+ obj_text = CleanHTML('./td[@class="ToWrd"]')