[wordreference] Adapt to browser2

This commit is contained in:
Bezleputh 2014-10-23 17:15:17 +02:00 committed by Romain Bignon
commit a7684982f8
3 changed files with 34 additions and 72 deletions

View file

@ -17,48 +17,22 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.deprecated.browser import Page
import re
from weboob.browser.pages import HTMLPage
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.capabilities.translate import Translation
from weboob.browser.filters.standard import CleanText, Regexp, Env
from weboob.browser.filters.html import CleanHTML
LAST_THING_IN_PARENTHESIS = re.compile("\([^)]\)$")
class TranslatePage(HTMLPage):
@method
class get_translation(ListElement):
item_xpath = '//table[@class="WRD" and not(@id)]/tr[@id]'
class item(ItemElement):
klass = Translation
class TranslatePage(Page):
def get_translation(self):
trs = self.document.getroot().xpath("//table[@class='WRD']/tr[@class='even']")
if trs and len(trs) > 0:
# taking the first signification in the case several were found
return self.parser.select(trs[0], "td[@class='ToWrd']", 1, method='xpath').text
"""
# taking the first signification in the case several were found
for tr in self.document.getiterator('tr'):
prev_was_nums1 = False
for td in tr.getiterator('td'):
if prev_was_nums1:
result = u''+td.text_content().split(';')[0].strip()
result = LAST_THING_IN_PARENTHESIS.sub("",result)
return result
if td.attrib.get('class','') == 'nums1':
prev_was_nums1 = True
# if only one signification is found
for div in self.document.getiterator('div'):
if div.attrib.get('class','') == "trans clickable":
if ']' in div.text_content():
tnames = div.text_content().split(']')[1].split()[1:]
else:
tnames = div.text_content().split()[1:]
names = u''+" ".join(tnames).split(';')[0]
names = LAST_THING_IN_PARENTHESIS.sub("",names)
return names.strip()
# another numerotation possibility...
for table in self.document.getiterator('table'):
if table.attrib.get('class','') == "trans clickable":
prev_was_roman1 = False
for td in table.getiterator('td'):
if prev_was_roman1:
return u''+td.text_content().split(';')[0].strip()
if td.attrib.get('class','') == 'roman1':
prev_was_roman1 = True
"""
obj_id = Regexp(CleanText('./@id'), '.*:(.*)')
obj_lang_src = Env('sl')
obj_lang_dst = Env('tl')
obj_text = CleanHTML('./td[@class="ToWrd"]')