rewrite transilien to use lxmlparser (closes #271)
This commit is contained in:
parent
090fb38feb
commit
729b5e9c8f
2 changed files with 38 additions and 124 deletions
|
|
@ -17,9 +17,40 @@
|
|||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import datetime
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.tools.misc import to_unicode
|
||||
from weboob.tools.browser import BasePage, BrokenPageError
|
||||
|
||||
class StationNotFound(Exception):
|
||||
pass
|
||||
|
||||
class RoutePage(BasePage):
|
||||
def on_loaded(self):
|
||||
return
|
||||
def iter_routes(self):
|
||||
try:
|
||||
table = self.parser.select(self.document.getroot(), 'table.horaires3', 1)
|
||||
except BrokenPageError:
|
||||
raise StationNotFound('Station not found')
|
||||
|
||||
departure = self.parser.select(table, 'td.caption strong', 1).text
|
||||
for tr in table.findall('tr'):
|
||||
if len(tr.findall('td')) != 4:
|
||||
continue
|
||||
|
||||
code_mission = self.parser.select(tr, 'td[headers=Code_de_mission] a', 1).text.strip()
|
||||
time = self.parser.select(tr, 'td[headers=Heure_de_passage]', 1).text.strip()
|
||||
destination = self.parser.select(tr, 'td[headers=Destination]', 1).text.strip()
|
||||
plateform = self.parser.select(tr, 'td[headers=Voie]', 1).text.strip()
|
||||
|
||||
try :
|
||||
time = datetime.datetime.combine(datetime.date.today(), datetime.time(*[int(x) for x in time.split(':')]))
|
||||
except ValueError:
|
||||
self.logger.warning('Unable to parse datetime')
|
||||
|
||||
yield {'type': to_unicode(code_mission),
|
||||
'time': time,
|
||||
'departure': to_unicode(departure),
|
||||
'arrival': to_unicode(destination),
|
||||
'late': datetime.time(),
|
||||
'late_reason': None,
|
||||
'plateform': to_unicode(plateform)}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue