[wordreference] Adapt to browser2

This commit is contained in:
Bezleputh 2014-10-23 17:15:17 +02:00 committed by Romain Bignon
commit a7684982f8
3 changed files with 34 additions and 72 deletions

View file

@ -18,34 +18,22 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib from weboob.browser import PagesBrowser, URL
from weboob.deprecated.browser import Browser
from .pages import TranslatePage from .pages import TranslatePage
__all__ = ['WordReferenceBrowser'] __all__ = ['WordReferenceBrowser']
class WordReferenceBrowser(Browser): class WordReferenceBrowser(PagesBrowser):
DOMAIN = 'www.wordreference.com' BASEURL = 'http://www.wordreference.com'
ENCODING = 'UTF-8' translation_page = URL('(?P<sl>[a-z]{2})(?P<tl>[a-z]{2})/(?P<pattern>.*)', TranslatePage)
USER_AGENT = Browser.USER_AGENTS['desktop_firefox']
PAGES = {
'https?://www\.wordreference\.com/.*/.*': TranslatePage
}
def __init__(self, *args, **kwargs):
Browser.__init__(self, *args, **kwargs)
def translate(self, source, to, text): def translate(self, source, to, text):
""" """
translate 'text' from 'source' language to 'to' language translate 'text' from 'source' language to 'to' language
""" """
sl = source.encode('utf-8')
tl = to.encode('utf-8') return self.translation_page.go(sl=source.encode('utf-8'),
text = text.encode('utf-8') tl=to.encode('utf-8'),
self.location('http://'+self.DOMAIN+'/'+sl+tl+'/'+urllib.quote(text)) pattern=text.encode('utf-8')).get_translation()
translation = self.page.get_translation()
return translation

View file

@ -19,7 +19,7 @@
"backend for http://www.wordreference.com" "backend for http://www.wordreference.com"
from weboob.capabilities.translate import CapTranslate, Translation, TranslationFail, LanguageNotSupported from weboob.capabilities.translate import CapTranslate, TranslationFail, LanguageNotSupported
from weboob.tools.backend import Module from weboob.tools.backend import Module
from .browser import WordReferenceBrowser from .browser import WordReferenceBrowser
@ -37,9 +37,9 @@ class WordReferenceModule(Module, CapTranslate):
DESCRIPTION = u'Free online translator' DESCRIPTION = u'Free online translator'
BROWSER = WordReferenceBrowser BROWSER = WordReferenceBrowser
WRLANGUAGE = { WRLANGUAGE = {
'Arabic':'ar', 'Chinese':'zh', 'Czech':'cz', 'English':'en', 'French':'fr', 'Greek':'gr', 'Arabic': 'ar', 'Chinese': 'zh', 'Czech': 'cz', 'English': 'en', 'French': 'fr', 'Greek': 'gr',
'Italian':'it', 'Japanese':'ja', 'Korean':'ko', 'Polish':'pl', 'Portuguese':'pt', 'Italian': 'it', 'Japanese': 'ja', 'Korean': 'ko', 'Polish': 'pl', 'Portuguese': 'pt',
'Romanian':'ro', 'Spanish':'es', 'Turkish':'tr', 'Romanian': 'ro', 'Spanish': 'es', 'Turkish': 'tr',
} }
def translate(self, lan_from, lan_to, text): def translate(self, lan_from, lan_to, text):
@ -49,12 +49,12 @@ class WordReferenceModule(Module, CapTranslate):
if lan_to not in self.WRLANGUAGE.keys(): if lan_to not in self.WRLANGUAGE.keys():
raise LanguageNotSupported() raise LanguageNotSupported()
translation = Translation(0) translations = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text)
translation.lang_src = unicode(self.WRLANGUAGE[lan_from]) has_translation = False
translation.lang_dst = unicode(self.WRLANGUAGE[lan_to])
translation.text = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text)
if translation.text is None: for translation in translations:
has_translation = True
yield translation
if not has_translation:
raise TranslationFail() raise TranslationFail()
return translation

View file

@ -17,48 +17,22 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser.pages import HTMLPage
from weboob.deprecated.browser import Page from weboob.browser.elements import ItemElement, ListElement, method
import re from weboob.capabilities.translate import Translation
from weboob.browser.filters.standard import CleanText, Regexp, Env
from weboob.browser.filters.html import CleanHTML
LAST_THING_IN_PARENTHESIS = re.compile("\([^)]\)$") class TranslatePage(HTMLPage):
@method
class get_translation(ListElement):
item_xpath = '//table[@class="WRD" and not(@id)]/tr[@id]'
class item(ItemElement):
klass = Translation
class TranslatePage(Page): obj_id = Regexp(CleanText('./@id'), '.*:(.*)')
def get_translation(self): obj_lang_src = Env('sl')
trs = self.document.getroot().xpath("//table[@class='WRD']/tr[@class='even']") obj_lang_dst = Env('tl')
if trs and len(trs) > 0: obj_text = CleanHTML('./td[@class="ToWrd"]')
# taking the first signification in the case several were found
return self.parser.select(trs[0], "td[@class='ToWrd']", 1, method='xpath').text
"""
# taking the first signification in the case several were found
for tr in self.document.getiterator('tr'):
prev_was_nums1 = False
for td in tr.getiterator('td'):
if prev_was_nums1:
result = u''+td.text_content().split(';')[0].strip()
result = LAST_THING_IN_PARENTHESIS.sub("",result)
return result
if td.attrib.get('class','') == 'nums1':
prev_was_nums1 = True
# if only one signification is found
for div in self.document.getiterator('div'):
if div.attrib.get('class','') == "trans clickable":
if ']' in div.text_content():
tnames = div.text_content().split(']')[1].split()[1:]
else:
tnames = div.text_content().split()[1:]
names = u''+" ".join(tnames).split(';')[0]
names = LAST_THING_IN_PARENTHESIS.sub("",names)
return names.strip()
# another numerotation possibility...
for table in self.document.getiterator('table'):
if table.attrib.get('class','') == "trans clickable":
prev_was_roman1 = False
for td in table.getiterator('td'):
if prev_was_roman1:
return u''+td.text_content().split(';')[0].strip()
if td.attrib.get('class','') == 'roman1':
prev_was_roman1 = True
"""