[wordreference] works now, dirty fix for a dirty website
This commit is contained in:
parent
c977a6a2fe
commit
d5c391696d
1 changed files with 23 additions and 4 deletions
|
|
@ -26,9 +26,28 @@ __all__ = ['TranslatePage']
|
|||
|
||||
class TranslatePage(BasePage):
|
||||
def get_translation(self):
|
||||
# taking the first signification in the case several were found
|
||||
for tr in self.document.getiterator('tr'):
|
||||
if tr.attrib.get('class','') == 'odd' or tr.attrib.get('class','') == 'even':
|
||||
for td in tr.getiterator('td'):
|
||||
if td.attrib.get('class','') == 'ToWrd':
|
||||
return u''+td.text
|
||||
prev_was_nums1 = False
|
||||
for td in tr.getiterator('td'):
|
||||
if prev_was_nums1:
|
||||
return u''+td.text_content().split(';')[0].strip()
|
||||
if td.attrib.get('class','') == 'nums1':
|
||||
prev_was_nums1 = True
|
||||
# if only one signification is found
|
||||
for div in self.document.getiterator('div'):
|
||||
if div.attrib.get('class','') == "trans clickable":
|
||||
names = u''+" ".join(div.text_content().split(']')[1].split()[1:]).split(';')[0]
|
||||
if ")" in names:
|
||||
names = names.split(")")[1]
|
||||
return names.strip()
|
||||
# another numerotation possibility...
|
||||
for table in self.document.getiterator('table'):
|
||||
if table.attrib.get('class','') == "trans clickable":
|
||||
prev_was_roman1 = False
|
||||
for td in table.getiterator('td'):
|
||||
if prev_was_nums1:
|
||||
return u''+td.text_content().split(';')[0].strip()
|
||||
if td.attrib.get('class','') == 'roman1':
|
||||
prev_was_nums1 = True
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue