[vlille] scrap webpage instead of provided xml
This commit is contained in:
parent
3b21f63936
commit
735c4600d9
2 changed files with 15 additions and 7 deletions
|
|
@ -32,13 +32,15 @@ class VlilleBrowser(BaseBrowser):
|
|||
ENCODING = None
|
||||
|
||||
PAGES = {
|
||||
'%s://%s/les-stations-vlille.aspx' % (PROTOCOL, DOMAIN): ListStationsPage,
|
||||
'%s://%s/xml-station.aspx\?borne=.*' % (PROTOCOL, DOMAIN): InfoStationPage,
|
||||
'%s://%s/xml-stations.aspx' % (PROTOCOL, DOMAIN): ListStationsPage,
|
||||
}
|
||||
|
||||
def get_station_list(self):
|
||||
if not self.is_on_page(ListStationsPage):
|
||||
self.location(u'%s://%s/xml-stations.aspx' % (self.PROTOCOL, self.DOMAIN))
|
||||
self.location('%s://%s/les-stations-vlille.aspx' % (self.PROTOCOL, self.DOMAIN))
|
||||
#self.location(u'%s://%s/xml-stations.aspx' % (self.PROTOCOL, self.DOMAIN))
|
||||
return self.page.get_station_list()
|
||||
|
||||
def get_station_infos(self, gauge):
|
||||
|
|
|
|||
|
|
@ -97,10 +97,16 @@ class InfoStationPage(BasePage):
|
|||
class ListStationsPage(BasePage):
|
||||
def get_station_list(self):
|
||||
gauges = []
|
||||
for marker in self.parser.select(self.document.getroot(), 'marker'):
|
||||
gauge = Gauge(int(marker.get('id')))
|
||||
gauge.name = unicode(marker.get('name'))
|
||||
gauge.city = u"Lille"
|
||||
gauge.object = u'vLille'
|
||||
gauges.append(gauge)
|
||||
|
||||
trs = self.document.getroot().xpath('//table[@id="ctl00_Contenu_ListeStations1_ListViewStations_itemPlaceholderContainer"]/tr')
|
||||
|
||||
for tr in trs:
|
||||
if not ('id' in tr.attrib):
|
||||
tds = self.parser.select(tr, 'td/span', method='xpath')
|
||||
if len(tds) > 4:
|
||||
gauge = Gauge(int(tds[0].text))
|
||||
gauge.name = u'%s' % tds[1].text
|
||||
gauge.city = u'%s' % tds[3].text
|
||||
gauge.object = u'vLille'
|
||||
gauges.append(gauge)
|
||||
return gauges
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue