[wordreference] Adapt to browser2
This commit is contained in:
parent
8688e266b5
commit
a7684982f8
3 changed files with 34 additions and 72 deletions
|
|
@ -18,34 +18,22 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
import urllib
|
from weboob.browser import PagesBrowser, URL
|
||||||
|
|
||||||
from weboob.deprecated.browser import Browser
|
|
||||||
|
|
||||||
from .pages import TranslatePage
|
from .pages import TranslatePage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['WordReferenceBrowser']
|
__all__ = ['WordReferenceBrowser']
|
||||||
|
|
||||||
|
|
||||||
class WordReferenceBrowser(Browser):
|
class WordReferenceBrowser(PagesBrowser):
|
||||||
DOMAIN = 'www.wordreference.com'
|
BASEURL = 'http://www.wordreference.com'
|
||||||
ENCODING = 'UTF-8'
|
translation_page = URL('(?P<sl>[a-z]{2})(?P<tl>[a-z]{2})/(?P<pattern>.*)', TranslatePage)
|
||||||
USER_AGENT = Browser.USER_AGENTS['desktop_firefox']
|
|
||||||
PAGES = {
|
|
||||||
'https?://www\.wordreference\.com/.*/.*': TranslatePage
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
Browser.__init__(self, *args, **kwargs)
|
|
||||||
|
|
||||||
def translate(self, source, to, text):
|
def translate(self, source, to, text):
|
||||||
"""
|
"""
|
||||||
translate 'text' from 'source' language to 'to' language
|
translate 'text' from 'source' language to 'to' language
|
||||||
"""
|
"""
|
||||||
sl = source.encode('utf-8')
|
|
||||||
tl = to.encode('utf-8')
|
return self.translation_page.go(sl=source.encode('utf-8'),
|
||||||
text = text.encode('utf-8')
|
tl=to.encode('utf-8'),
|
||||||
self.location('http://'+self.DOMAIN+'/'+sl+tl+'/'+urllib.quote(text))
|
pattern=text.encode('utf-8')).get_translation()
|
||||||
translation = self.page.get_translation()
|
|
||||||
return translation
|
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@
|
||||||
"backend for http://www.wordreference.com"
|
"backend for http://www.wordreference.com"
|
||||||
|
|
||||||
|
|
||||||
from weboob.capabilities.translate import CapTranslate, Translation, TranslationFail, LanguageNotSupported
|
from weboob.capabilities.translate import CapTranslate, TranslationFail, LanguageNotSupported
|
||||||
from weboob.tools.backend import Module
|
from weboob.tools.backend import Module
|
||||||
|
|
||||||
from .browser import WordReferenceBrowser
|
from .browser import WordReferenceBrowser
|
||||||
|
|
@ -37,9 +37,9 @@ class WordReferenceModule(Module, CapTranslate):
|
||||||
DESCRIPTION = u'Free online translator'
|
DESCRIPTION = u'Free online translator'
|
||||||
BROWSER = WordReferenceBrowser
|
BROWSER = WordReferenceBrowser
|
||||||
WRLANGUAGE = {
|
WRLANGUAGE = {
|
||||||
'Arabic':'ar', 'Chinese':'zh', 'Czech':'cz', 'English':'en', 'French':'fr', 'Greek':'gr',
|
'Arabic': 'ar', 'Chinese': 'zh', 'Czech': 'cz', 'English': 'en', 'French': 'fr', 'Greek': 'gr',
|
||||||
'Italian':'it', 'Japanese':'ja', 'Korean':'ko', 'Polish':'pl', 'Portuguese':'pt',
|
'Italian': 'it', 'Japanese': 'ja', 'Korean': 'ko', 'Polish': 'pl', 'Portuguese': 'pt',
|
||||||
'Romanian':'ro', 'Spanish':'es', 'Turkish':'tr',
|
'Romanian': 'ro', 'Spanish': 'es', 'Turkish': 'tr',
|
||||||
}
|
}
|
||||||
|
|
||||||
def translate(self, lan_from, lan_to, text):
|
def translate(self, lan_from, lan_to, text):
|
||||||
|
|
@ -49,12 +49,12 @@ class WordReferenceModule(Module, CapTranslate):
|
||||||
if lan_to not in self.WRLANGUAGE.keys():
|
if lan_to not in self.WRLANGUAGE.keys():
|
||||||
raise LanguageNotSupported()
|
raise LanguageNotSupported()
|
||||||
|
|
||||||
translation = Translation(0)
|
translations = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text)
|
||||||
translation.lang_src = unicode(self.WRLANGUAGE[lan_from])
|
has_translation = False
|
||||||
translation.lang_dst = unicode(self.WRLANGUAGE[lan_to])
|
|
||||||
translation.text = self.browser.translate(self.WRLANGUAGE[lan_from], self.WRLANGUAGE[lan_to], text)
|
|
||||||
|
|
||||||
if translation.text is None:
|
for translation in translations:
|
||||||
|
has_translation = True
|
||||||
|
yield translation
|
||||||
|
|
||||||
|
if not has_translation:
|
||||||
raise TranslationFail()
|
raise TranslationFail()
|
||||||
|
|
||||||
return translation
|
|
||||||
|
|
|
||||||
|
|
@ -17,48 +17,22 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
from weboob.browser.pages import HTMLPage
|
||||||
from weboob.deprecated.browser import Page
|
from weboob.browser.elements import ItemElement, ListElement, method
|
||||||
import re
|
from weboob.capabilities.translate import Translation
|
||||||
|
from weboob.browser.filters.standard import CleanText, Regexp, Env
|
||||||
|
from weboob.browser.filters.html import CleanHTML
|
||||||
|
|
||||||
|
|
||||||
LAST_THING_IN_PARENTHESIS = re.compile("\([^)]\)$")
|
class TranslatePage(HTMLPage):
|
||||||
|
@method
|
||||||
|
class get_translation(ListElement):
|
||||||
|
item_xpath = '//table[@class="WRD" and not(@id)]/tr[@id]'
|
||||||
|
|
||||||
|
class item(ItemElement):
|
||||||
|
klass = Translation
|
||||||
|
|
||||||
class TranslatePage(Page):
|
obj_id = Regexp(CleanText('./@id'), '.*:(.*)')
|
||||||
def get_translation(self):
|
obj_lang_src = Env('sl')
|
||||||
trs = self.document.getroot().xpath("//table[@class='WRD']/tr[@class='even']")
|
obj_lang_dst = Env('tl')
|
||||||
if trs and len(trs) > 0:
|
obj_text = CleanHTML('./td[@class="ToWrd"]')
|
||||||
# taking the first signification in the case several were found
|
|
||||||
return self.parser.select(trs[0], "td[@class='ToWrd']", 1, method='xpath').text
|
|
||||||
"""
|
|
||||||
# taking the first signification in the case several were found
|
|
||||||
for tr in self.document.getiterator('tr'):
|
|
||||||
prev_was_nums1 = False
|
|
||||||
for td in tr.getiterator('td'):
|
|
||||||
if prev_was_nums1:
|
|
||||||
result = u''+td.text_content().split(';')[0].strip()
|
|
||||||
result = LAST_THING_IN_PARENTHESIS.sub("",result)
|
|
||||||
return result
|
|
||||||
if td.attrib.get('class','') == 'nums1':
|
|
||||||
prev_was_nums1 = True
|
|
||||||
# if only one signification is found
|
|
||||||
for div in self.document.getiterator('div'):
|
|
||||||
if div.attrib.get('class','') == "trans clickable":
|
|
||||||
if ']' in div.text_content():
|
|
||||||
tnames = div.text_content().split(']')[1].split()[1:]
|
|
||||||
else:
|
|
||||||
tnames = div.text_content().split()[1:]
|
|
||||||
names = u''+" ".join(tnames).split(';')[0]
|
|
||||||
names = LAST_THING_IN_PARENTHESIS.sub("",names)
|
|
||||||
return names.strip()
|
|
||||||
# another numerotation possibility...
|
|
||||||
for table in self.document.getiterator('table'):
|
|
||||||
if table.attrib.get('class','') == "trans clickable":
|
|
||||||
prev_was_roman1 = False
|
|
||||||
for td in table.getiterator('td'):
|
|
||||||
if prev_was_roman1:
|
|
||||||
return u''+td.text_content().split(';')[0].strip()
|
|
||||||
if td.attrib.get('class','') == 'roman1':
|
|
||||||
prev_was_roman1 = True
|
|
||||||
"""
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue