change API of TableElement to use col_ attributes instead of dict
This commit is contained in:
parent
a9afcb685d
commit
80d3f693e8
2 changed files with 21 additions and 7 deletions
|
|
@ -89,9 +89,9 @@ class TableCell(_Filter):
|
||||||
class table(TableElement):
|
class table(TableElement):
|
||||||
head_xpath = '//table/thead/th'
|
head_xpath = '//table/thead/th'
|
||||||
item_xpath = '//table/tbody/tr'
|
item_xpath = '//table/tbody/tr'
|
||||||
columns = {'date': u'Date',
|
|
||||||
'label': [u'Name', 'Label'],
|
col_date = u'Date'
|
||||||
}
|
col_label = [u'Name', u'Label']
|
||||||
|
|
||||||
class item(ItemElement):
|
class item(ItemElement):
|
||||||
klass = Object
|
klass = Object
|
||||||
|
|
@ -99,15 +99,19 @@ class TableCell(_Filter):
|
||||||
obj_label = CleanText(TableCell('label'))
|
obj_label = CleanText(TableCell('label'))
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, *names):
|
def __init__(self, *names, **kwargs):
|
||||||
super(TableCell, self).__init__()
|
super(TableCell, self).__init__()
|
||||||
self.names = names
|
self.names = names
|
||||||
|
self.default = kwargs.pop('default', None)
|
||||||
|
|
||||||
def __call__(self, item):
|
def __call__(self, item):
|
||||||
for name in self.names:
|
for name in self.names:
|
||||||
idx = item.parent.get_colnum(name)
|
idx = item.parent.get_colnum(name)
|
||||||
if idx is not None:
|
if idx is not None:
|
||||||
return item.xpath('./td[%s]' % (idx + 1))
|
return item.xpath('./td[%s]' % (idx + 1))
|
||||||
|
|
||||||
|
if self.default is not None:
|
||||||
|
return self.default
|
||||||
raise KeyError('Unable to find column %s' % ' or '.join(self.names))
|
raise KeyError('Unable to find column %s' % ' or '.join(self.names))
|
||||||
|
|
||||||
class CleanText(Filter):
|
class CleanText(Filter):
|
||||||
|
|
|
||||||
|
|
@ -475,12 +475,17 @@ class _ItemElementMeta(type):
|
||||||
Private meta-class used to keep order of obj_* attributes in ItemElement.
|
Private meta-class used to keep order of obj_* attributes in ItemElement.
|
||||||
"""
|
"""
|
||||||
def __new__(cls, name, bases, attrs):
|
def __new__(cls, name, bases, attrs):
|
||||||
|
_attrs = []
|
||||||
|
for base in bases:
|
||||||
|
if hasattr(base, '_attrs'):
|
||||||
|
_attrs += base._attrs
|
||||||
|
|
||||||
filters = [(re.sub('^obj_', '', attr_name), attrs[attr_name]) for attr_name, obj in attrs.items() if attr_name.startswith('obj_')]
|
filters = [(re.sub('^obj_', '', attr_name), attrs[attr_name]) for attr_name, obj in attrs.items() if attr_name.startswith('obj_')]
|
||||||
# constants first, then filters, then methods
|
# constants first, then filters, then methods
|
||||||
filters.sort(key=lambda x: x[1]._creation_counter if hasattr(x[1], '_creation_counter') else (sys.maxint if callable(x[1]) else 0))
|
filters.sort(key=lambda x: x[1]._creation_counter if hasattr(x[1], '_creation_counter') else (sys.maxint if callable(x[1]) else 0))
|
||||||
|
|
||||||
new_class = super(_ItemElementMeta, cls).__new__(cls, name, bases, attrs)
|
new_class = super(_ItemElementMeta, cls).__new__(cls, name, bases, attrs)
|
||||||
new_class._attrs = [f[0] for f in filters]
|
new_class._attrs = _attrs + [f[0] for f in filters]
|
||||||
return new_class
|
return new_class
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -530,7 +535,6 @@ class ItemElement(AbstractElement):
|
||||||
|
|
||||||
class TableElement(ListElement):
|
class TableElement(ListElement):
|
||||||
head_xpath = None
|
head_xpath = None
|
||||||
columns = None
|
|
||||||
cleaner = CleanText
|
cleaner = CleanText
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|
@ -538,9 +542,15 @@ class TableElement(ListElement):
|
||||||
|
|
||||||
self._cols = {}
|
self._cols = {}
|
||||||
|
|
||||||
|
columns = {}
|
||||||
|
for attrname in dir(self):
|
||||||
|
m = re.match('col_(.*)', attrname)
|
||||||
|
if m:
|
||||||
|
columns[m.group(1)] = getattr(self, attrname)
|
||||||
|
|
||||||
for colnum, el in enumerate(self.el.xpath(self.head_xpath)):
|
for colnum, el in enumerate(self.el.xpath(self.head_xpath)):
|
||||||
title = self.cleaner.clean(el)
|
title = self.cleaner.clean(el)
|
||||||
for name, titles in self.columns.iteritems():
|
for name, titles in columns.iteritems():
|
||||||
if title in titles or title == titles:
|
if title in titles or title == titles:
|
||||||
self._cols[name] = colnum
|
self._cols[name] = colnum
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue