lcl: website change, rewrite with browser2

2014-12-03 23:22:29 +01:00 · 2014-12-03 23:22:29 +01:00 · 446bb3416c
commit 446bb3416c
parent 321f6f8633
4 changed files with 157 additions and 320 deletions
--- a/modules/lcl/pages.py
+++ b/modules/lcl/pages.py
@ -20,15 +20,20 @@
 import re
 import base64
 from decimal import Decimal
-from logging import error
 import math
 import random
+from cStringIO import StringIO


 from weboob.capabilities.bank import Account
-from weboob.deprecated.browser import Page, BrowserUnavailable
-from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
+from weboob.browser.elements import method, ListElement, ItemElement, SkipItem
+from weboob.exceptions import ParseError
+from weboob.browser.pages import LoggedPage, HTMLPage, FormNotFound
+from weboob.browser.filters.standard import CleanText, Field, Regexp, Format, \
+                                            CleanDecimal, Map
+from weboob.exceptions import BrowserUnavailable
 from weboob.tools.capabilities.bank.transactions import FrenchTransaction
+from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError


 class LCLVirtKeyboard(MappedVirtKeyboard):
@ -48,169 +53,140 @@ class LCLVirtKeyboard(MappedVirtKeyboard):

    color=(255,255,255,255)

-    def __init__(self,basepage):
-        img=basepage.document.find("//img[@id='idImageClavier']")
+    def __init__(self, basepage):
+        img=basepage.doc.find("//img[@id='idImageClavier']")
        random.seed()
-        self.url+="%s"%str(long(math.floor(long(random.random()*1000000000000000000000))))
-        MappedVirtKeyboard.__init__(self,basepage.browser.openurl(self.url),
-                                    basepage.document,img,self.color,"id")
+        self.url += "%s"%str(long(math.floor(long(random.random()*1000000000000000000000))))
+        super(LCLVirtKeyboard, self).__init__(StringIO(basepage.browser.open(self.url).content), basepage.doc,img,self.color, "id")
        self.check_symbols(self.symbols,basepage.browser.responses_dirname)

-    def get_symbol_code(self,md5sum):
-        code=MappedVirtKeyboard.get_symbol_code(self,md5sum)
+    def get_symbol_code(self, md5sum):
+        code=MappedVirtKeyboard.get_symbol_code(self, md5sum)
        return code[-2:]

-    def get_string_code(self,string):
+    def get_string_code(self, string):
        code=''
        for c in string:
-            code+=self.get_symbol_code(self.symbols[c])
+            code += self.get_symbol_code(self.symbols[c])
        return code


-class SkipPage(Page):
-    pass
-
-
-class LoginPage(Page):
-    def on_loaded(self):
+class LoginPage(HTMLPage):
+    def on_load(self):
        try:
-            self.browser.select_form(name='form')
-        except:
-            try:
-                self.browser.select_form(predicate=lambda x: x.attrs.get('id','')=='setInfosCGS')
-            except:
-                return
+            form = self.get_form(xpath='//form[@id="setInfosCGS" or @name="form"]')
+        except FormNotFound:
+            return

-        self.browser.submit(nologin=True)
+        form.submit()

    def myXOR(self,value,seed):
-        s=''
+        s = ''
        for i in xrange(len(value)):
-            s+=chr(seed^ord(value[i]))
+            s += chr(seed^ord(value[i]))
        return s

    def login(self, login, passwd):
        try:
-            vk=LCLVirtKeyboard(self)
+            vk = LCLVirtKeyboard(self)
        except VirtKeyboardError as err:
-            error("Error: %s"%err)
+            self.logger.exception(err)
            return False

-        password=vk.get_string_code(passwd)
+        password = vk.get_string_code(passwd)

-        seed=-1
-        str="var aleatoire = "
-        for script in self.document.findall("//script"):
-            if(script.text is None or len(script.text)==0):
+        seed = -1
+        s = "var aleatoire = "
+        for script in self.doc.findall("//script"):
+            if script.text is None or len(script.text) == 0:
                continue
-            offset=script.text.find(str)
-            if offset!=-1:
-                seed=int(script.text[offset+len(str)+1:offset+len(str)+2])
+            offset = script.text.find(s)
+            if offset != -1:
+                seed = int(script.text[offset+len(s)+1:offset+len(s)+2])
                break
        if seed==-1:
-            error("Variable 'aleatoire' not found")
-            return False
+            raise ParseError("Variable 'aleatoire' not found")

-        self.browser.select_form(
-            predicate=lambda x: x.attrs.get('id','')=='formAuthenticate')
-        self.browser.form.set_all_readonly(False)
-        self.browser['identifiant'] = login.encode('utf-8')
-        self.browser['postClavierXor'] = base64.b64encode(self.myXOR(password,seed))
+        form = self.get_form('//form[@id="formAuthenticate"]')
+        form['identifiant'] = login
+        form['postClavierXor'] = base64.b64encode(self.myXOR(password,seed))
        try:
-            self.browser['identifiantRouting'] = self.browser.IDENTIFIANT_ROUTING
+            form['identifiantRouting'] = self.browser.IDENTIFIANT_ROUTING
        except AttributeError:
            pass

        try:
-            self.browser.submit(nologin=True)
+            form.submit()
        except BrowserUnavailable:
            # Login is not valid
            return False
        return True

    def is_error(self):
-        errors = self.document.xpath(u'//div[@class="erreur" or @class="messError"]')
+        errors = self.doc.xpath(u'//div[@class="erreur" or @class="messError"]')
        return len(errors) > 0


-class ContractsPage(Page):
-    def on_loaded(self):
+class ContractsPage(LoggedPage, HTMLPage):
+    def on_load(self):
        self.select_contract()

    def select_contract(self):
        # XXX We select automatically the default contract in list. We should let user
        # ask what contract he wants to see, or display accounts for all contracts.
-        self.browser.select_form(nr=0)
-        self.browser.submit(nologin=True)
+        form = self.get_form(nr=0)
+        form.submit()


-class AccountsPage(Page):
-    def on_loaded(self):
-        warn = self.document.xpath('//div[@id="attTxt"]')
+class AccountsPage(LoggedPage, HTMLPage):
+    def on_load(self):
+        warn = self.doc.xpath('//div[@id="attTxt"]')
        if len(warn) > 0:
            raise BrowserUnavailable(warn[0].text)

-    def get_list(self):
-        l = []
-        ids = set()
-        for a in self.document.getiterator('a'):
-            link=a.attrib.get('href')
-            if link is None:
-                continue
-            if link.startswith("/outil/UWLM/ListeMouvements"):
-                account = Account()
-                #by default the website propose the last 7 days or last 45 days but we can force to have the last 55days
-                account._link_id=link+"&mode=55"
-                account._coming_links = []
-                parameters=link.split("?").pop().split("&")
-                for parameter in parameters:
-                    list=parameter.split("=")
-                    value=list.pop()
-                    name=list.pop()
-                    if name=="agence":
-                        account.id=value
-                    elif name=="compte":
-                        account.id+=value
-                    elif name=="nature":
-                        # TODO parse this string to get the right Account.TYPE_* to
-                        # store in account.type.
-                        account._type=value
+    @method
+    class get_list(ListElement):
+        item_xpath = '//tr[contains(@onclick, "redirect")]'
+        flush_at_end = True

-                if account.id in ids:
-                    continue
+        class account(ItemElement):
+            klass = Account

-                ids.add(account.id)
-                div = a.getparent().getprevious()
-                if not div.text.strip():
-                    div = div.find('div')
-                account.label=u''+div.text.strip()
-                balance = FrenchTransaction.clean_amount(a.text)
-                if '-' in balance:
-                    balance='-'+balance.replace('-', '')
-                account.balance=Decimal(balance)
-                account.currency = account.get_currency(a.text)
-                self.logger.debug('%s Type: %s' % (account.label, account._type))
-                l.append(account)
-            if link.startswith('/outil/UWCB/UWCBEncours'):
-                if len(l) == 0:
-                    self.logger.warning('There is a card account but not any check account')
-                    continue
+            def condition(self):
+                return '/outil/UWLM/ListeMouvement' in self.el.attrib['onclick']

-                account = l[-1]
+            NATURE2TYPE = {'006': Account.TYPE_CHECKING,
+                           '049': Account.TYPE_SAVINGS,
+                           '068': Account.TYPE_MARKET,
+                           '069': Account.TYPE_SAVINGS,
+                          }

-                coming = FrenchTransaction.clean_amount(a.text)
-                if '-' in coming:
-                    coming = '-'+coming.replace('-', '')
+            obj__link_id = Format('%s&mode=55', Regexp(CleanText('./@onclick'), "'(.*)'"))
+            obj_id = Regexp(Field('_link_id'), r'.*agence=(\w+).*compte=(\w+)', r'\1\2')
+            obj__coming_links = []
+            obj_label = CleanText('.//div[@class="libelleCompte"]')
+            obj_balance = CleanDecimal('.//td[has-class("right")]', replace_dots=True)
+            obj_currency = FrenchTransaction.Currency('.//td[has-class("right")]')
+            obj_type = Map(Regexp(Field('_link_id'), r'.*nature=(\w+)'), NATURE2TYPE, default=Account.TYPE_UNKNOWN)
+
+        class card(ItemElement):
+            def condition(self):
+                return '/outil/UWCB/UWCBEncours' in self.el.attrib['onclick']
+
+            def parse(self, el):
+                link = Regexp(CleanText('./@onclick'), "'(.*)'")(el)
+                id = Regexp(CleanText('./@onclick'), r'.*AGENCE=(\w+).*COMPTE=(\w+).*CLE=(\w+)', r'\1\2\3')(el)
+
+                account = self.parent.objects[id]
                if not account.coming:
                    account.coming = Decimal('0')
-                account.coming += Decimal(coming)
+
+                account.coming += CleanDecimal('.//td[has-class("right")]', replace_dots=True)(el)
                account._coming_links.append(link)
-
-        return l
-
+                raise SkipItem()

 class Transaction(FrenchTransaction):
-    PATTERNS = [(re.compile('^(?P<category>CB)  (?P<text>RETRAIT) DU  (?P<dd>\d+)/(?P<mm>\d+)'),
+    PATTERNS = [(re.compile('^(?P<category>CB) (?P<text>RETRAIT) DU (?P<dd>\d+)/(?P<mm>\d+)'),
                                                            FrenchTransaction.TYPE_WITHDRAWAL),
                (re.compile('^(?P<category>(PRLV|PE)) (?P<text>.*)'),
                                                            FrenchTransaction.TYPE_ORDER),
@ -235,103 +211,26 @@ class Transaction(FrenchTransaction):
               ]


-class AccountHistoryPage(Page):
-    def get_table(self):
-        tables=self.document.findall("//table[@class='tagTab pyjama']")
-        for table in tables:
-            # Look for the relevant table in the Pro version
-            header=table.getprevious()
-            while header is not None and str(header.tag) != 'div':
-                header=header.getprevious()
-            if header is not None:
-                header=header.find("div")
-            if header is not None:
-                header=header.find("span")
+class AccountHistoryPage(LoggedPage, HTMLPage):
+    @method
+    class _get_operations(Transaction.TransactionsElement):
+        item_xpath = '//table[has-class("tagTab") and (not(@style) or @style="")]/tr'
+        head_xpath = '//table[has-class("tagTab") and (not(@style) or @style="")]/tr/th'

-            if header is not None and \
-               header.text.strip().startswith("Opérations effectuées".decode('utf-8')):
-                return table
+        col_raw = [u'Vos opérations', u'Libellé']

-            # Look for the relevant table in the Particulier version
-            header=table.find("thead").find("tr").find("th[@class='titleTab titleTableft']")
-            if header is not None and\
-               header.text.strip().startswith("Solde au"):
-                return table
+        class item(Transaction.TransactionElement):
+            def condition(self):
+                return self.parent.get_colnum('date') is not None and len(self.el.findall('td')) >= 3

-    def strip_label(self, s):
-        return s
+            def validate(self, obj):
+                return obj.category != 'RELEVE CB'

    def get_operations(self):
-        table = self.get_table()
-        operations = []
-
-        if table is None:
-            return operations
-
-        for tr in table.iter('tr'):
-            # skip headers and empty rows
-            if len(tr.findall("th"))!=0 or\
-               len(tr.findall("td"))<=1:
-                continue
-            mntColumn = 0
-
-            date = None
-            raw = None
-            credit = ''
-            debit = ''
-            for td in tr.iter('td'):
-                value = td.attrib.get('id')
-                if value is None:
-                    # if tag has no id nor class, assume it's a label
-                    value = td.attrib.get('class', 'opLib')
-
-                if value.startswith("date") or value.endswith('center'):
-                    # some transaction are included in a <strong> tag
-                    date = u''.join([txt.strip() for txt in td.itertext()])
-                elif value.startswith("lib") or value.startswith("opLib"):
-                    # misclosed A tag requires to grab text from td
-                    tooltip = td.xpath('./div[@class="autoTooltip"]')
-                    if len(tooltip) > 0:
-                        td.remove(tooltip[0])
-                    raw = self.parser.tocleanstring(td)
-                elif value.startswith("solde") or value.startswith("mnt") or \
-                     value.startswith('debit') or value.startswith('credit'):
-                    mntColumn += 1
-                    amount = u''.join([txt.strip() for txt in td.itertext()])
-                    if amount != "":
-                        if value.startswith("soldeDeb") or value.startswith('debit') or mntColumn==1:
-                            debit = amount
-                        else:
-                            credit = amount
-
-            if date is None:
-                # skip non-transaction
-                continue
-
-            operation = Transaction(len(operations))
-            operation.parse(date, raw)
-            operation.set_amount(credit, debit)
-
-            if operation.category == 'RELEVE CB':
-                # strip that transaction which is detailled in CBListPage.
-                continue
-
-            operations.append(operation)
-        return operations
+        return self._get_operations()


 class CBHistoryPage(AccountHistoryPage):
-    def get_table(self):
-        # there is only one table on the page
-        try:
-            return self.document.findall("//table[@class='tagTab pyjama']")[0]
-        except IndexError:
-            return None
-
-    def strip_label(self, label):
-        # prevent to be considered as a category if there are two spaces.
-        return re.sub(r'[ ]+', ' ', label).strip()
-
    def get_operations(self):
        for tr in AccountHistoryPage.get_operations(self):
            tr.type = tr.TYPE_CARD
@ -341,8 +240,8 @@ class CBHistoryPage(AccountHistoryPage):
 class CBListPage(CBHistoryPage):
    def get_cards(self):
        cards = []
-        for a in self.document.getiterator('a'):
-            link = a.attrib.get('href', '')
-            if link.startswith('/outil/UWCB/UWCBEncours') and 'listeOperations' in link:
+        for tr in self.doc.getiterator('tr'):
+            link = Regexp(CleanText('./@onclick'), "'(.*)'", default=None)(tr)
+            if link is not None and link.startswith('/outil/UWCB/UWCBEncours') and 'listeOperations' in link:
                cards.append(link)
        return cards