From 7432cfc46573585eb11872a5746eb18ae7d52d9b Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Sun, 10 Feb 2013 19:04:45 +0100 Subject: [PATCH] [wordreference] make result cleaner --- modules/wordreference/pages.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/wordreference/pages.py b/modules/wordreference/pages.py index b9994766..1b7737fd 100644 --- a/modules/wordreference/pages.py +++ b/modules/wordreference/pages.py @@ -19,9 +19,11 @@ from weboob.tools.browser import BasePage +import re __all__ = ['TranslatePage'] +LAST_THING_IN_PARENTHESIS = re.compile("\([^)]\)$") class TranslatePage(BasePage): @@ -31,15 +33,16 @@ class TranslatePage(BasePage): prev_was_nums1 = False for td in tr.getiterator('td'): if prev_was_nums1: - return u''+td.text_content().split(';')[0].strip() + result = u''+td.text_content().split(';')[0].strip() + result = LAST_THING_IN_PARENTHESIS.sub("",result) + return result if td.attrib.get('class','') == 'nums1': prev_was_nums1 = True # if only one signification is found for div in self.document.getiterator('div'): if div.attrib.get('class','') == "trans clickable": names = u''+" ".join(div.text_content().split(']')[1].split()[1:]).split(';')[0] - if ")" in names: - names = names.split(")")[1] + names = LAST_THING_IN_PARENTHESIS.sub("",names) return names.strip() # another numerotation possibility... for table in self.document.getiterator('table'):