rewrite transilien to use lxmlparser (closes #271)
This commit is contained in:
parent
090fb38feb
commit
729b5e9c8f
2 changed files with 38 additions and 124 deletions
|
|
@ -18,115 +18,17 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
from datetime import datetime, date, time
|
|
||||||
import HTMLParser
|
|
||||||
|
|
||||||
from weboob.tools.browser import BaseBrowser
|
from weboob.tools.browser import BaseBrowser
|
||||||
from weboob.tools.misc import to_unicode
|
|
||||||
|
|
||||||
from .pages.route import RoutePage
|
from .pages.route import RoutePage
|
||||||
|
|
||||||
class Route(object):
|
|
||||||
"une ligne code_mission | time"
|
|
||||||
def __init__(self, code_mission, time, destination, platform):
|
|
||||||
self.code_mission = code_mission
|
|
||||||
self.time = time
|
|
||||||
self.destination = destination
|
|
||||||
self.platform = platform
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "<Route %s %s %s %s>" % (self.code_mission,
|
|
||||||
self.time, self.destination, self.platform)
|
|
||||||
|
|
||||||
class Parser(HTMLParser.HTMLParser):
|
|
||||||
"Parse les tableaux html contenant les horaires"
|
|
||||||
def __init__(self):
|
|
||||||
HTMLParser.HTMLParser.__init__(self)
|
|
||||||
self.__table_horaires3 = False
|
|
||||||
self.__code_de_mission = False
|
|
||||||
self.__a_code_de_mission = False
|
|
||||||
self.__time = False
|
|
||||||
self.__destination = False
|
|
||||||
self.__platform = False
|
|
||||||
self.__liste_train = []
|
|
||||||
self.__liste_horaire = []
|
|
||||||
self.__liste_destination = []
|
|
||||||
self.__liste_platform = []
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def parse(cls, data, encoding):
|
|
||||||
parser = cls()
|
|
||||||
parser.feed(data.read())
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
|
||||||
"execute a chaque balise ouvrante"
|
|
||||||
if (tag == 'table' and (dict(attrs)['class'] == 'horaires3')):
|
|
||||||
self.__table_horaires3 = True
|
|
||||||
|
|
||||||
elif self.__table_horaires3 and tag == 'td':
|
|
||||||
try:
|
|
||||||
self.__code_de_mission = (
|
|
||||||
dict(attrs)['headers'] == 'Code_de_mission')
|
|
||||||
self.__time = (
|
|
||||||
dict(attrs)['headers'] == 'Heure_de_passage')
|
|
||||||
self.__destination = (
|
|
||||||
dict(attrs)['headers'] == 'Destination')
|
|
||||||
self.__platform = (
|
|
||||||
dict(attrs)['headers'] == 'Voie')
|
|
||||||
except KeyError:
|
|
||||||
if dict(attrs).has_key('headers'):
|
|
||||||
raise
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
self.__a_code_de_mission = (tag == 'a' and self.__code_de_mission)
|
|
||||||
|
|
||||||
def handle_data(self, data):
|
|
||||||
"execute pour chaque contenu de balise"
|
|
||||||
if self.__a_code_de_mission:
|
|
||||||
self.__liste_train.append(data.strip())
|
|
||||||
if self.__time and data.strip() != '*':
|
|
||||||
self.__liste_horaire.append(data.strip())
|
|
||||||
if self.__destination:
|
|
||||||
self.__liste_destination.append(data.strip())
|
|
||||||
if self.__platform:
|
|
||||||
self.__liste_platform.append(data.strip())
|
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
|
||||||
"execute à chaque balise fermante"
|
|
||||||
self.__a_code_de_mission ^= (self.__a_code_de_mission and tag == 'a')
|
|
||||||
self.__time ^= (self.__time and tag == 'td')
|
|
||||||
self.__destination ^= (self.__destination and tag == 'td')
|
|
||||||
self.__platform ^= (self.__platform and tag == 'td')
|
|
||||||
|
|
||||||
|
|
||||||
@property
|
|
||||||
def list_route(self):
|
|
||||||
"getter"
|
|
||||||
__list_route = []
|
|
||||||
__curseur_horaire = 0
|
|
||||||
for __i in self.__liste_train:
|
|
||||||
__list_route.append(Route(
|
|
||||||
code_mission=__i,
|
|
||||||
time=self.__liste_horaire[__curseur_horaire],
|
|
||||||
destination=self.__liste_destination[__curseur_horaire],
|
|
||||||
platform=self.__liste_platform[__curseur_horaire]
|
|
||||||
))
|
|
||||||
__curseur_horaire += 1
|
|
||||||
return __list_route
|
|
||||||
|
|
||||||
class Transilien(BaseBrowser):
|
class Transilien(BaseBrowser):
|
||||||
DOMAIN = 'www.transilien.com'
|
DOMAIN = 'www.transilien.com'
|
||||||
PAGES = {'https://www\.transilien\.com/web/ITProchainsTrainsAvecDest\.do\?.*': RoutePage,
|
|
||||||
'https://www\.transilien\.com/web/ITProchainsTrains\.do\?.*': RoutePage
|
|
||||||
}
|
|
||||||
PROTOCOL = 'https'
|
PROTOCOL = 'https'
|
||||||
USER_AGENT = BaseBrowser.USER_AGENTS['microb']
|
USER_AGENT = BaseBrowser.USER_AGENTS['microb']
|
||||||
|
PAGES = {'https://www\.transilien\.com/web/ITProchainsTrainsAvecDest\.do\?.*': RoutePage,
|
||||||
def __init__(self, **kwargs):
|
'https://www\.transilien\.com/web/ITProchainsTrains\.do\?.*': RoutePage,
|
||||||
kwargs['parser'] = Parser
|
}
|
||||||
BaseBrowser.__init__(self, '', **kwargs)
|
|
||||||
|
|
||||||
def iter_station_search(self, pattern):
|
def iter_station_search(self, pattern):
|
||||||
pass
|
pass
|
||||||
|
|
@ -136,27 +38,8 @@ class Transilien(BaseBrowser):
|
||||||
self.location('https://www.transilien.com/web/ITProchainsTrainsAvecDest.do?codeTr3aDepart=%s&codeTr3aDest=%s&urlModule=/site/pid/184&gareAcc=true' % (station_id, arrival_id))
|
self.location('https://www.transilien.com/web/ITProchainsTrainsAvecDest.do?codeTr3aDepart=%s&codeTr3aDest=%s&urlModule=/site/pid/184&gareAcc=true' % (station_id, arrival_id))
|
||||||
else:
|
else:
|
||||||
self.location('https://www.transilien.com/web/ITProchainsTrains.do?tr3a=%s&urlModule=/site/pid/184' % station_id)
|
self.location('https://www.transilien.com/web/ITProchainsTrains.do?tr3a=%s&urlModule=/site/pid/184' % station_id)
|
||||||
for route in self.page.document.list_route:
|
|
||||||
_late_reason = None
|
|
||||||
try :
|
|
||||||
_time = datetime.combine(date.today(), time(*[int(x) for x in route.time.split(':')]))
|
|
||||||
except ValueError:
|
|
||||||
_time = None
|
|
||||||
_late_reason = route.time
|
|
||||||
else:
|
|
||||||
yield {'type': to_unicode(route.code_mission),
|
|
||||||
'time': _time,
|
|
||||||
'departure': to_unicode(station_id),
|
|
||||||
'arrival': to_unicode(route.destination),
|
|
||||||
'late': time(),
|
|
||||||
'late_reason': _late_reason,
|
|
||||||
'plateform': to_unicode(route.platform)}
|
|
||||||
|
|
||||||
def home(self):
|
return self.page.iter_routes()
|
||||||
pass
|
|
||||||
|
|
||||||
def login(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def is_logged(self):
|
def is_logged(self):
|
||||||
""" Do not need to be logged """
|
""" Do not need to be logged """
|
||||||
|
|
|
||||||
|
|
@ -17,9 +17,40 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.misc import to_unicode
|
||||||
|
from weboob.tools.browser import BasePage, BrokenPageError
|
||||||
|
|
||||||
|
class StationNotFound(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
class RoutePage(BasePage):
|
class RoutePage(BasePage):
|
||||||
def on_loaded(self):
|
def iter_routes(self):
|
||||||
return
|
try:
|
||||||
|
table = self.parser.select(self.document.getroot(), 'table.horaires3', 1)
|
||||||
|
except BrokenPageError:
|
||||||
|
raise StationNotFound('Station not found')
|
||||||
|
|
||||||
|
departure = self.parser.select(table, 'td.caption strong', 1).text
|
||||||
|
for tr in table.findall('tr'):
|
||||||
|
if len(tr.findall('td')) != 4:
|
||||||
|
continue
|
||||||
|
|
||||||
|
code_mission = self.parser.select(tr, 'td[headers=Code_de_mission] a', 1).text.strip()
|
||||||
|
time = self.parser.select(tr, 'td[headers=Heure_de_passage]', 1).text.strip()
|
||||||
|
destination = self.parser.select(tr, 'td[headers=Destination]', 1).text.strip()
|
||||||
|
plateform = self.parser.select(tr, 'td[headers=Voie]', 1).text.strip()
|
||||||
|
|
||||||
|
try :
|
||||||
|
time = datetime.datetime.combine(datetime.date.today(), datetime.time(*[int(x) for x in time.split(':')]))
|
||||||
|
except ValueError:
|
||||||
|
self.logger.warning('Unable to parse datetime')
|
||||||
|
|
||||||
|
yield {'type': to_unicode(code_mission),
|
||||||
|
'time': time,
|
||||||
|
'departure': to_unicode(departure),
|
||||||
|
'arrival': to_unicode(destination),
|
||||||
|
'late': datetime.time(),
|
||||||
|
'late_reason': None,
|
||||||
|
'plateform': to_unicode(plateform)}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue