[wordreference] make result cleaner
This commit is contained in:
parent
d5c391696d
commit
7432cfc465
1 changed files with 6 additions and 3 deletions
|
|
@ -19,9 +19,11 @@
|
||||||
|
|
||||||
|
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['TranslatePage']
|
__all__ = ['TranslatePage']
|
||||||
|
LAST_THING_IN_PARENTHESIS = re.compile("\([^)]\)$")
|
||||||
|
|
||||||
|
|
||||||
class TranslatePage(BasePage):
|
class TranslatePage(BasePage):
|
||||||
|
|
@ -31,15 +33,16 @@ class TranslatePage(BasePage):
|
||||||
prev_was_nums1 = False
|
prev_was_nums1 = False
|
||||||
for td in tr.getiterator('td'):
|
for td in tr.getiterator('td'):
|
||||||
if prev_was_nums1:
|
if prev_was_nums1:
|
||||||
return u''+td.text_content().split(';')[0].strip()
|
result = u''+td.text_content().split(';')[0].strip()
|
||||||
|
result = LAST_THING_IN_PARENTHESIS.sub("",result)
|
||||||
|
return result
|
||||||
if td.attrib.get('class','') == 'nums1':
|
if td.attrib.get('class','') == 'nums1':
|
||||||
prev_was_nums1 = True
|
prev_was_nums1 = True
|
||||||
# if only one signification is found
|
# if only one signification is found
|
||||||
for div in self.document.getiterator('div'):
|
for div in self.document.getiterator('div'):
|
||||||
if div.attrib.get('class','') == "trans clickable":
|
if div.attrib.get('class','') == "trans clickable":
|
||||||
names = u''+" ".join(div.text_content().split(']')[1].split()[1:]).split(';')[0]
|
names = u''+" ".join(div.text_content().split(']')[1].split()[1:]).split(';')[0]
|
||||||
if ")" in names:
|
names = LAST_THING_IN_PARENTHESIS.sub("",names)
|
||||||
names = names.split(")")[1]
|
|
||||||
return names.strip()
|
return names.strip()
|
||||||
# another numerotation possibility...
|
# another numerotation possibility...
|
||||||
for table in self.document.getiterator('table'):
|
for table in self.document.getiterator('table'):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue