[wordreference] Adapt to browser2

2014-10-23 17:15:17 +02:00 · 2014-10-23 17:15:17 +02:00 · a7684982f8
commit a7684982f8
parent 8688e266b5
3 changed files with 34 additions and 72 deletions
--- a/modules/wordreference/browser.py
+++ b/modules/wordreference/browser.py
@ -18,34 +18,22 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
-import urllib
+from weboob.browser import PagesBrowser, URL
 from weboob.deprecated.browser import Browser
 from .pages import TranslatePage
 __all__ = ['WordReferenceBrowser']
-class WordReferenceBrowser(Browser):
+class WordReferenceBrowser(PagesBrowser):
-    DOMAIN = 'www.wordreference.com'
+    BASEURL = 'http://www.wordreference.com'
-    ENCODING = 'UTF-8'
+    translation_page = URL('(?P<sl>[a-z]{2})(?P<tl>[a-z]{2})/(?P<pattern>.*)', TranslatePage)
    USER_AGENT = Browser.USER_AGENTS['desktop_firefox']
    PAGES = {
        'https?://www\.wordreference\.com/.*/.*': TranslatePage
        }
    def __init__(self, *args, **kwargs):
        Browser.__init__(self, *args, **kwargs)
    def translate(self, source, to, text):
        """
        translate 'text' from 'source' language to 'to' language
        """
-        sl   = source.encode('utf-8')
+
-        tl   = to.encode('utf-8')
+        return self.translation_page.go(sl=source.encode('utf-8'),
-        text = text.encode('utf-8')
+                                        tl=to.encode('utf-8'),
-        self.location('http://'+self.DOMAIN+'/'+sl+tl+'/'+urllib.quote(text))
+                                        pattern=text.encode('utf-8')).get_translation()
        translation = self.page.get_translation()
        return translation
--- a/modules/wordreference/module.py
+++ b/modules/wordreference/module.py
@ -19,7 +19,7 @@
 "backend for http://www.wordreference.com"
-from weboob.capabilities.translate import CapTranslate, Translation, TranslationFail, LanguageNotSupported
+from weboob.capabilities.translate import CapTranslate, TranslationFail, LanguageNotSupported
 from weboob.tools.backend import Module
 from .browser import WordReferenceBrowser
@ -37,9 +37,9 @@ class WordReferenceModule(Module, CapTranslate):
    DESCRIPTION = u'Free online translator'
    BROWSER = WordReferenceBrowser
    WRLANGUAGE = {
-        'Arabic':'ar', 'Chinese':'zh', 'Czech':'cz', 'English':'en', 'French':'fr', 'Greek':'gr',
+        'Arabic': 'ar', 'Chinese': 'zh', 'Czech': 'cz', 'English': 'en', 'French': 'fr', 'Greek': 'gr',
-        'Italian':'it', 'Japanese':'ja', 'Korean':'ko', 'Polish':'pl', 'Portuguese':'pt',
+        'Italian': 'it', 'Japanese': 'ja', 'Korean': 'ko', 'Polish': 'pl', 'Portuguese': 'pt',
-        'Romanian':'ro', 'Spanish':'es', 'Turkish':'tr',
+        'Romanian': 'ro', 'Spanish': 'es', 'Turkish': 'tr',
        }
    def translate(self, lan_from, lan_to, text):
@ -49,12 +49,12 @@ class WordReferenceModule(Module, CapTranslate):
        if lan_to not in self.WRLANGUAGE.keys():
            raise LanguageNotSupported()
-        translation = Translation(0)
+        translations = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text)
-        translation.lang_src = unicode(self.WRLANGUAGE[lan_from])
+        has_translation = False
        translation.lang_dst = unicode(self.WRLANGUAGE[lan_to])
        translation.text = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text)
-        if translation.text is None:
+        for translation in translations:
            has_translation = True
            yield translation
        if not has_translation:
            raise TranslationFail()
        return translation
--- a/modules/wordreference/pages.py
+++ b/modules/wordreference/pages.py
@ -17,48 +17,22 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
-
+from weboob.browser.pages import HTMLPage
-from weboob.deprecated.browser import Page
+from weboob.browser.elements import ItemElement, ListElement, method
-import re
+from weboob.capabilities.translate import Translation
 from weboob.browser.filters.standard import CleanText, Regexp, Env
 from weboob.browser.filters.html import CleanHTML
-LAST_THING_IN_PARENTHESIS = re.compile("\([^)]\)$")
+class TranslatePage(HTMLPage):
    @method
    class get_translation(ListElement):
        item_xpath = '//table[@class="WRD" and not(@id)]/tr[@id]'
        class item(ItemElement):
            klass = Translation
-class TranslatePage(Page):
+            obj_id = Regexp(CleanText('./@id'), '.*:(.*)')
-    def get_translation(self):
+            obj_lang_src = Env('sl')
-        trs = self.document.getroot().xpath("//table[@class='WRD']/tr[@class='even']")
+            obj_lang_dst = Env('tl')
-        if trs and len(trs) > 0:
+            obj_text = CleanHTML('./td[@class="ToWrd"]')
            # taking the first signification in the case several were found
            return self.parser.select(trs[0], "td[@class='ToWrd']", 1, method='xpath').text
        """
        # taking the first signification in the case several were found
        for tr in self.document.getiterator('tr'):
            prev_was_nums1 = False
            for td in tr.getiterator('td'):
                if prev_was_nums1:
                    result = u''+td.text_content().split(';')[0].strip()
                    result = LAST_THING_IN_PARENTHESIS.sub("",result)
                    return result
                if td.attrib.get('class','') == 'nums1':
                    prev_was_nums1 = True
        # if only one signification is found
        for div in self.document.getiterator('div'):
            if div.attrib.get('class','') == "trans clickable":
                if ']' in div.text_content():
                    tnames = div.text_content().split(']')[1].split()[1:]
                else:
                    tnames = div.text_content().split()[1:]
                names = u''+" ".join(tnames).split(';')[0]
                names = LAST_THING_IN_PARENTHESIS.sub("",names)
                return names.strip()
        # another numerotation possibility...
        for table in self.document.getiterator('table'):
            if table.attrib.get('class','') == "trans clickable":
                prev_was_roman1 = False
                for td in table.getiterator('td'):
                    if prev_was_roman1:
                        return u''+td.text_content().split(';')[0].strip()
                    if td.attrib.get('class','') == 'roman1':
                        prev_was_roman1 = True
        """