From d5c391696d1fef94a0e2f33721bfa2d820f32dad Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Sun, 10 Feb 2013 18:39:51 +0100 Subject: [PATCH] [wordreference] works now, dirty fix for a dirty website --- modules/wordreference/pages.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/modules/wordreference/pages.py b/modules/wordreference/pages.py index d1426db9..b9994766 100644 --- a/modules/wordreference/pages.py +++ b/modules/wordreference/pages.py @@ -26,9 +26,28 @@ __all__ = ['TranslatePage'] class TranslatePage(BasePage): def get_translation(self): + # taking the first signification in the case several were found for tr in self.document.getiterator('tr'): - if tr.attrib.get('class','') == 'odd' or tr.attrib.get('class','') == 'even': - for td in tr.getiterator('td'): - if td.attrib.get('class','') == 'ToWrd': - return u''+td.text + prev_was_nums1 = False + for td in tr.getiterator('td'): + if prev_was_nums1: + return u''+td.text_content().split(';')[0].strip() + if td.attrib.get('class','') == 'nums1': + prev_was_nums1 = True + # if only one signification is found + for div in self.document.getiterator('div'): + if div.attrib.get('class','') == "trans clickable": + names = u''+" ".join(div.text_content().split(']')[1].split()[1:]).split(';')[0] + if ")" in names: + names = names.split(")")[1] + return names.strip() + # another numerotation possibility... + for table in self.document.getiterator('table'): + if table.attrib.get('class','') == "trans clickable": + prev_was_roman1 = False + for td in table.getiterator('td'): + if prev_was_nums1: + return u''+td.text_content().split(';')[0].strip() + if td.attrib.get('class','') == 'roman1': + prev_was_nums1 = True