rewrite transilien to use lxmlparser (closes #271)
This commit is contained in:
parent
090fb38feb
commit
729b5e9c8f
2 changed files with 38 additions and 124 deletions
|
|
@ -18,115 +18,17 @@
|
|||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from datetime import datetime, date, time
|
||||
import HTMLParser
|
||||
|
||||
from weboob.tools.browser import BaseBrowser
|
||||
from weboob.tools.misc import to_unicode
|
||||
|
||||
from .pages.route import RoutePage
|
||||
|
||||
class Route(object):
|
||||
"une ligne code_mission | time"
|
||||
def __init__(self, code_mission, time, destination, platform):
|
||||
self.code_mission = code_mission
|
||||
self.time = time
|
||||
self.destination = destination
|
||||
self.platform = platform
|
||||
|
||||
def __repr__(self):
|
||||
return "<Route %s %s %s %s>" % (self.code_mission,
|
||||
self.time, self.destination, self.platform)
|
||||
|
||||
class Parser(HTMLParser.HTMLParser):
|
||||
"Parse les tableaux html contenant les horaires"
|
||||
def __init__(self):
|
||||
HTMLParser.HTMLParser.__init__(self)
|
||||
self.__table_horaires3 = False
|
||||
self.__code_de_mission = False
|
||||
self.__a_code_de_mission = False
|
||||
self.__time = False
|
||||
self.__destination = False
|
||||
self.__platform = False
|
||||
self.__liste_train = []
|
||||
self.__liste_horaire = []
|
||||
self.__liste_destination = []
|
||||
self.__liste_platform = []
|
||||
|
||||
@classmethod
|
||||
def parse(cls, data, encoding):
|
||||
parser = cls()
|
||||
parser.feed(data.read())
|
||||
return parser
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
"execute a chaque balise ouvrante"
|
||||
if (tag == 'table' and (dict(attrs)['class'] == 'horaires3')):
|
||||
self.__table_horaires3 = True
|
||||
|
||||
elif self.__table_horaires3 and tag == 'td':
|
||||
try:
|
||||
self.__code_de_mission = (
|
||||
dict(attrs)['headers'] == 'Code_de_mission')
|
||||
self.__time = (
|
||||
dict(attrs)['headers'] == 'Heure_de_passage')
|
||||
self.__destination = (
|
||||
dict(attrs)['headers'] == 'Destination')
|
||||
self.__platform = (
|
||||
dict(attrs)['headers'] == 'Voie')
|
||||
except KeyError:
|
||||
if dict(attrs).has_key('headers'):
|
||||
raise
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
self.__a_code_de_mission = (tag == 'a' and self.__code_de_mission)
|
||||
|
||||
def handle_data(self, data):
|
||||
"execute pour chaque contenu de balise"
|
||||
if self.__a_code_de_mission:
|
||||
self.__liste_train.append(data.strip())
|
||||
if self.__time and data.strip() != '*':
|
||||
self.__liste_horaire.append(data.strip())
|
||||
if self.__destination:
|
||||
self.__liste_destination.append(data.strip())
|
||||
if self.__platform:
|
||||
self.__liste_platform.append(data.strip())
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
"execute à chaque balise fermante"
|
||||
self.__a_code_de_mission ^= (self.__a_code_de_mission and tag == 'a')
|
||||
self.__time ^= (self.__time and tag == 'td')
|
||||
self.__destination ^= (self.__destination and tag == 'td')
|
||||
self.__platform ^= (self.__platform and tag == 'td')
|
||||
|
||||
|
||||
@property
|
||||
def list_route(self):
|
||||
"getter"
|
||||
__list_route = []
|
||||
__curseur_horaire = 0
|
||||
for __i in self.__liste_train:
|
||||
__list_route.append(Route(
|
||||
code_mission=__i,
|
||||
time=self.__liste_horaire[__curseur_horaire],
|
||||
destination=self.__liste_destination[__curseur_horaire],
|
||||
platform=self.__liste_platform[__curseur_horaire]
|
||||
))
|
||||
__curseur_horaire += 1
|
||||
return __list_route
|
||||
|
||||
class Transilien(BaseBrowser):
|
||||
DOMAIN = 'www.transilien.com'
|
||||
PAGES = {'https://www\.transilien\.com/web/ITProchainsTrainsAvecDest\.do\?.*': RoutePage,
|
||||
'https://www\.transilien\.com/web/ITProchainsTrains\.do\?.*': RoutePage
|
||||
}
|
||||
PROTOCOL = 'https'
|
||||
USER_AGENT = BaseBrowser.USER_AGENTS['microb']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
kwargs['parser'] = Parser
|
||||
BaseBrowser.__init__(self, '', **kwargs)
|
||||
PAGES = {'https://www\.transilien\.com/web/ITProchainsTrainsAvecDest\.do\?.*': RoutePage,
|
||||
'https://www\.transilien\.com/web/ITProchainsTrains\.do\?.*': RoutePage,
|
||||
}
|
||||
|
||||
def iter_station_search(self, pattern):
|
||||
pass
|
||||
|
|
@ -136,27 +38,8 @@ class Transilien(BaseBrowser):
|
|||
self.location('https://www.transilien.com/web/ITProchainsTrainsAvecDest.do?codeTr3aDepart=%s&codeTr3aDest=%s&urlModule=/site/pid/184&gareAcc=true' % (station_id, arrival_id))
|
||||
else:
|
||||
self.location('https://www.transilien.com/web/ITProchainsTrains.do?tr3a=%s&urlModule=/site/pid/184' % station_id)
|
||||
for route in self.page.document.list_route:
|
||||
_late_reason = None
|
||||
try :
|
||||
_time = datetime.combine(date.today(), time(*[int(x) for x in route.time.split(':')]))
|
||||
except ValueError:
|
||||
_time = None
|
||||
_late_reason = route.time
|
||||
else:
|
||||
yield {'type': to_unicode(route.code_mission),
|
||||
'time': _time,
|
||||
'departure': to_unicode(station_id),
|
||||
'arrival': to_unicode(route.destination),
|
||||
'late': time(),
|
||||
'late_reason': _late_reason,
|
||||
'plateform': to_unicode(route.platform)}
|
||||
|
||||
def home(self):
|
||||
pass
|
||||
|
||||
def login(self):
|
||||
pass
|
||||
return self.page.iter_routes()
|
||||
|
||||
def is_logged(self):
|
||||
""" Do not need to be logged """
|
||||
|
|
|
|||
|
|
@ -17,9 +17,40 @@
|
|||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import datetime
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.tools.misc import to_unicode
|
||||
from weboob.tools.browser import BasePage, BrokenPageError
|
||||
|
||||
class StationNotFound(Exception):
|
||||
pass
|
||||
|
||||
class RoutePage(BasePage):
|
||||
def on_loaded(self):
|
||||
return
|
||||
def iter_routes(self):
|
||||
try:
|
||||
table = self.parser.select(self.document.getroot(), 'table.horaires3', 1)
|
||||
except BrokenPageError:
|
||||
raise StationNotFound('Station not found')
|
||||
|
||||
departure = self.parser.select(table, 'td.caption strong', 1).text
|
||||
for tr in table.findall('tr'):
|
||||
if len(tr.findall('td')) != 4:
|
||||
continue
|
||||
|
||||
code_mission = self.parser.select(tr, 'td[headers=Code_de_mission] a', 1).text.strip()
|
||||
time = self.parser.select(tr, 'td[headers=Heure_de_passage]', 1).text.strip()
|
||||
destination = self.parser.select(tr, 'td[headers=Destination]', 1).text.strip()
|
||||
plateform = self.parser.select(tr, 'td[headers=Voie]', 1).text.strip()
|
||||
|
||||
try :
|
||||
time = datetime.datetime.combine(datetime.date.today(), datetime.time(*[int(x) for x in time.split(':')]))
|
||||
except ValueError:
|
||||
self.logger.warning('Unable to parse datetime')
|
||||
|
||||
yield {'type': to_unicode(code_mission),
|
||||
'time': time,
|
||||
'departure': to_unicode(departure),
|
||||
'arrival': to_unicode(destination),
|
||||
'late': datetime.time(),
|
||||
'late_reason': None,
|
||||
'plateform': to_unicode(plateform)}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue