diff --git a/modules/wordreference/pages.py b/modules/wordreference/pages.py index d1426db9..b9994766 100644 --- a/modules/wordreference/pages.py +++ b/modules/wordreference/pages.py @@ -26,9 +26,28 @@ __all__ = ['TranslatePage'] class TranslatePage(BasePage): def get_translation(self): + # taking the first signification in the case several were found for tr in self.document.getiterator('tr'): - if tr.attrib.get('class','') == 'odd' or tr.attrib.get('class','') == 'even': - for td in tr.getiterator('td'): - if td.attrib.get('class','') == 'ToWrd': - return u''+td.text + prev_was_nums1 = False + for td in tr.getiterator('td'): + if prev_was_nums1: + return u''+td.text_content().split(';')[0].strip() + if td.attrib.get('class','') == 'nums1': + prev_was_nums1 = True + # if only one signification is found + for div in self.document.getiterator('div'): + if div.attrib.get('class','') == "trans clickable": + names = u''+" ".join(div.text_content().split(']')[1].split()[1:]).split(';')[0] + if ")" in names: + names = names.split(")")[1] + return names.strip() + # another numerotation possibility... + for table in self.document.getiterator('table'): + if table.attrib.get('class','') == "trans clickable": + prev_was_roman1 = False + for td in table.getiterator('td'): + if prev_was_nums1: + return u''+td.text_content().split(';')[0].strip() + if td.attrib.get('class','') == 'roman1': + prev_was_nums1 = True