From 735c4600d9485438bdae7665833d0484325a2ff2 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Wed, 18 Dec 2013 21:24:50 +0100 Subject: [PATCH] [vlille] scrap webpage instead of provided xml --- modules/vlille/browser.py | 4 +++- modules/vlille/pages.py | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/modules/vlille/browser.py b/modules/vlille/browser.py index d7fe69ee..0083c2fd 100644 --- a/modules/vlille/browser.py +++ b/modules/vlille/browser.py @@ -32,13 +32,15 @@ class VlilleBrowser(BaseBrowser): ENCODING = None PAGES = { + '%s://%s/les-stations-vlille.aspx' % (PROTOCOL, DOMAIN): ListStationsPage, '%s://%s/xml-station.aspx\?borne=.*' % (PROTOCOL, DOMAIN): InfoStationPage, '%s://%s/xml-stations.aspx' % (PROTOCOL, DOMAIN): ListStationsPage, } def get_station_list(self): if not self.is_on_page(ListStationsPage): - self.location(u'%s://%s/xml-stations.aspx' % (self.PROTOCOL, self.DOMAIN)) + self.location('%s://%s/les-stations-vlille.aspx' % (self.PROTOCOL, self.DOMAIN)) + #self.location(u'%s://%s/xml-stations.aspx' % (self.PROTOCOL, self.DOMAIN)) return self.page.get_station_list() def get_station_infos(self, gauge): diff --git a/modules/vlille/pages.py b/modules/vlille/pages.py index 96dfc74b..225b9cd1 100644 --- a/modules/vlille/pages.py +++ b/modules/vlille/pages.py @@ -97,10 +97,16 @@ class InfoStationPage(BasePage): class ListStationsPage(BasePage): def get_station_list(self): gauges = [] - for marker in self.parser.select(self.document.getroot(), 'marker'): - gauge = Gauge(int(marker.get('id'))) - gauge.name = unicode(marker.get('name')) - gauge.city = u"Lille" - gauge.object = u'vLille' - gauges.append(gauge) + + trs = self.document.getroot().xpath('//table[@id="ctl00_Contenu_ListeStations1_ListViewStations_itemPlaceholderContainer"]/tr') + + for tr in trs: + if not ('id' in tr.attrib): + tds = self.parser.select(tr, 'td/span', method='xpath') + if len(tds) > 4: + gauge = Gauge(int(tds[0].text)) + gauge.name = u'%s' % tds[1].text + gauge.city = u'%s' % tds[3].text + gauge.object = u'vLille' + gauges.append(gauge) return gauges