[wordreference] works now, dirty fix for a dirty website
This commit is contained in:
parent
c977a6a2fe
commit
d5c391696d
1 changed files with 23 additions and 4 deletions
|
|
@ -26,9 +26,28 @@ __all__ = ['TranslatePage']
|
||||||
|
|
||||||
class TranslatePage(BasePage):
|
class TranslatePage(BasePage):
|
||||||
def get_translation(self):
|
def get_translation(self):
|
||||||
|
# taking the first signification in the case several were found
|
||||||
for tr in self.document.getiterator('tr'):
|
for tr in self.document.getiterator('tr'):
|
||||||
if tr.attrib.get('class','') == 'odd' or tr.attrib.get('class','') == 'even':
|
prev_was_nums1 = False
|
||||||
for td in tr.getiterator('td'):
|
for td in tr.getiterator('td'):
|
||||||
if td.attrib.get('class','') == 'ToWrd':
|
if prev_was_nums1:
|
||||||
return u''+td.text
|
return u''+td.text_content().split(';')[0].strip()
|
||||||
|
if td.attrib.get('class','') == 'nums1':
|
||||||
|
prev_was_nums1 = True
|
||||||
|
# if only one signification is found
|
||||||
|
for div in self.document.getiterator('div'):
|
||||||
|
if div.attrib.get('class','') == "trans clickable":
|
||||||
|
names = u''+" ".join(div.text_content().split(']')[1].split()[1:]).split(';')[0]
|
||||||
|
if ")" in names:
|
||||||
|
names = names.split(")")[1]
|
||||||
|
return names.strip()
|
||||||
|
# another numerotation possibility...
|
||||||
|
for table in self.document.getiterator('table'):
|
||||||
|
if table.attrib.get('class','') == "trans clickable":
|
||||||
|
prev_was_roman1 = False
|
||||||
|
for td in table.getiterator('td'):
|
||||||
|
if prev_was_nums1:
|
||||||
|
return u''+td.text_content().split(';')[0].strip()
|
||||||
|
if td.attrib.get('class','') == 'roman1':
|
||||||
|
prev_was_nums1 = True
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue