[vlille] scrap webpage instead of provided xml

This commit is contained in:
Bezleputh 2013-12-18 21:24:50 +01:00 committed by Florent Fourcot
commit 735c4600d9
2 changed files with 15 additions and 7 deletions

View file

@ -32,13 +32,15 @@ class VlilleBrowser(BaseBrowser):
ENCODING = None
PAGES = {
'%s://%s/les-stations-vlille.aspx' % (PROTOCOL, DOMAIN): ListStationsPage,
'%s://%s/xml-station.aspx\?borne=.*' % (PROTOCOL, DOMAIN): InfoStationPage,
'%s://%s/xml-stations.aspx' % (PROTOCOL, DOMAIN): ListStationsPage,
}
def get_station_list(self):
if not self.is_on_page(ListStationsPage):
self.location(u'%s://%s/xml-stations.aspx' % (self.PROTOCOL, self.DOMAIN))
self.location('%s://%s/les-stations-vlille.aspx' % (self.PROTOCOL, self.DOMAIN))
#self.location(u'%s://%s/xml-stations.aspx' % (self.PROTOCOL, self.DOMAIN))
return self.page.get_station_list()
def get_station_infos(self, gauge):

View file

@ -97,10 +97,16 @@ class InfoStationPage(BasePage):
class ListStationsPage(BasePage):
def get_station_list(self):
gauges = []
for marker in self.parser.select(self.document.getroot(), 'marker'):
gauge = Gauge(int(marker.get('id')))
gauge.name = unicode(marker.get('name'))
gauge.city = u"Lille"
gauge.object = u'vLille'
gauges.append(gauge)
trs = self.document.getroot().xpath('//table[@id="ctl00_Contenu_ListeStations1_ListViewStations_itemPlaceholderContainer"]/tr')
for tr in trs:
if not ('id' in tr.attrib):
tds = self.parser.select(tr, 'td/span', method='xpath')
if len(tds) > 4:
gauge = Gauge(int(tds[0].text))
gauge.name = u'%s' % tds[1].text
gauge.city = u'%s' % tds[3].text
gauge.object = u'vLille'
gauges.append(gauge)
return gauges