CrAgr: trivia: reordered methods within the AccountsList class.

2012-06-10 18:26:45 +02:00 · 2012-06-10 18:26:45 +02:00 · 63dbc13322
commit 63dbc13322
parent ca4240cb19
1 changed files with 101 additions and 101 deletions
--- a/modules/cragr/pages/accounts_list.py
+++ b/modules/cragr/pages/accounts_list.py
@ -35,6 +35,37 @@ class AccountsList(CragrBasePage):
        based on its URL.
    """
    def is_accounts_list(self):
        """
            Returns True if the current page appears to be the page dedicated to
            list the accounts.
        """
        # we check for the presence of a "mes comptes titres" link_id
        link = self.document.xpath('/html/body//a[contains(text(), "comptes titres")]')
        return bool(link)
    def is_account_page(self):
        """
            Returns True if the current page appears to be a page dedicated to list
            the history of a specific account.
        """
        # tested on CA Lorraine, Paris, Toulouse
        title_spans = self.document.xpath('/html/body//div[@class="dv"]/span')
        for title_span in title_spans:
            title_text = title_span.text_content().strip().replace("\n", '')
            if (re.match('.*Compte.*n.*[0-9]+', title_text, flags=re.IGNORECASE)):
                return True
        return False
    def is_transfer_page(self):
        """
            Returns True if the current page appears to be the page dedicated to
            order transfers between accounts.
        """
        source_account_select_field = self.document.xpath('/html/body//form//select[@name="numCompteEmetteur"]')
        target_account_select_field  = self.document.xpath('/html/body//form//select[@name="numCompteBeneficiaire"]')
        return bool(source_account_select_field) and bool(target_account_select_field)
    def get_list(self):
        """
            Returns the list of available bank accounts
@ -81,36 +112,70 @@ class AccountsList(CragrBasePage):
                    l.append(account)
        return l
-    def is_accounts_list(self):
+    def get_history(self, start_index=0, start_offset=0):
        """
-            Returns True if the current page appears to be the page dedicated to
+            Returns the history of a specific account. Note that this function
-            list the accounts.
+            expects the current page to be the one dedicated to this history.
-        """
+            start_index is the id used for the first created operation.
-        # we check for the presence of a "mes comptes titres" link_id
+            start_offset allows ignoring the `n' first Transactions on the page.
        link = self.document.xpath('/html/body//a[contains(text(), "comptes titres")]')
        return bool(link)
    def is_account_page(self):
        """
            Returns True if the current page appears to be a page dedicated to list
            the history of a specific account.
        """
        # tested on CA Lorraine, Paris, Toulouse
-        title_spans = self.document.xpath('/html/body//div[@class="dv"]/span')
+        # avoir parsing the page as an account-dedicated page if it is not the case
-        for title_span in title_spans:
+        if not self.is_account_page():
-            title_text = title_span.text_content().strip().replace("\n", '')
+            return
            if (re.match('.*Compte.*n.*[0-9]+', title_text, flags=re.IGNORECASE)):
                return True
        return False
-    def is_transfer_page(self):
+        # Step 1: extract text tokens
-        """
+        tokens = []
-            Returns True if the current page appears to be the page dedicated to
+        token_extractor = TokenExtractor()
-            order transfers between accounts.
+        for div in self.document.getiterator('div'):
-        """
+            if div.attrib.get('class', '') in ('dv'):
-        source_account_select_field = self.document.xpath('/html/body//form//select[@name="numCompteEmetteur"]')
+                self.logger.debug("Analyzing div %s" % div)
-        target_account_select_field  = self.document.xpath('/html/body//form//select[@name="numCompteBeneficiaire"]')
+                for token in token_extractor.extract_tokens(div):
-        return bool(source_account_select_field) and bool(target_account_select_field)
+                    self.logger.debug('Extracted text token: "%s"' % token)
                    tokens.append(token)
        # Step 2: convert tokens into operations
        # Notes:
        # * the code below expects pieces of information to be in the date-label-amount order;
        #   could we achieve a heuristic smart enough to guess this order?
        # * unlike the former code, we parse every operation
        operations = []
        current_operation = {}
        for token in tokens:
            self.logger.debug('Analyzing token: "%s"' % token)
            date_analysis = self.look_like_date_only(token)
            if date_analysis:
                current_operation = {}
                current_operation['date'] = date_analysis.groups()[0]
            else:
                date_desc_analysis = self.look_like_date_and_description(token)
                if date_desc_analysis:
                    current_operation = {}
                    current_operation['date'] = date_desc_analysis.groups()[0]
                    current_operation['label'] = date_desc_analysis.groups()[1]
                elif self.look_like_amount(token):
                    # we consider the amount is the last information we get for an operation
                    current_operation['amount'] = self.clean_amount(token)
                    if current_operation.get('label') is not None and current_operation.get('date') is not None:
                        self.logger.debug('Parsed operation: %s: %s: %s' % (current_operation['date'], current_operation['label'], current_operation['amount']))
                        operations.append(current_operation)
                        current_operation = {}
                else:
                    if current_operation.get('label') is not None:
                        current_operation['label'] = u'%s %s' % (current_operation['label'], token)
                    else:
                        current_operation['label'] = token
        # Step 3: yield adequate transactions
        index = start_index
        for op in operations[start_offset:]:
            self.logger.debug('will yield the following transaction with index %d: %s: %s: %s' % (index, op['date'], op['label'], op['amount']))
            transaction = Transaction(index)
            index += 1
            transaction.amount = op['amount']
            transaction.date = self.date_from_string(op['date'])
            transaction.raw = op['label']
            yield transaction
    def get_transfer_accounts(self, select_name):
        """
@ -219,6 +284,16 @@ class AccountsList(CragrBasePage):
            year = today.year
        return date(year, month, day)
    def clean_amount(self, amount):
        """
            Removes weird characters and converts to a Decimal
            >>> clean_amount(u'1 000,00 $')
            1000.0
        """
        data = amount.replace(',', '.').replace(' ', '').replace(u'\xa0', '')
        matches = re.findall('^(-?[0-9]+\.[0-9]{2}).*$', data)
        return Decimal(matches[0]) if (matches) else Decimal(0)
    def look_like_account_owner(self, string):
        """ Returns a date object built from a given day/month pair. """
        result = re.match('^\s*(M\.|Mr|Mme|Mlle|Monsieur|Madame|Mademoiselle)', string, re.IGNORECASE)
@ -256,78 +331,3 @@ class AccountsList(CragrBasePage):
        result = re.search('^\s*((?:[012][0-9]|3[01])/(?:0[1-9]|1[012]))\s+(.+)\s*$', string)
        self.logger.debug('Does "%s" look like a date+description pair? %s', string, ('yes' if result else 'no'))
        return result
    def clean_amount(self, amount):
        """
            Removes weird characters and converts to a Decimal
            >>> clean_amount(u'1 000,00 $')
            1000.0
        """
        data = amount.replace(',', '.').replace(' ', '').replace(u'\xa0', '')
        matches = re.findall('^(-?[0-9]+\.[0-9]{2}).*$', data)
        return Decimal(matches[0]) if (matches) else Decimal(0)
    def get_history(self, start_index=0, start_offset=0):
        """
            Returns the history of a specific account. Note that this function
            expects the current page to be the one dedicated to this history.
            start_index is the id used for the first created operation.
            start_offset allows ignoring the `n' first Transactions on the page.
        """
        # tested on CA Lorraine, Paris, Toulouse
        # avoir parsing the page as an account-dedicated page if it is not the case
        if not self.is_account_page():
            return
        # Step 1: extract text tokens
        tokens = []
        token_extractor = TokenExtractor()
        for div in self.document.getiterator('div'):
            if div.attrib.get('class', '') in ('dv'):
                self.logger.debug("Analyzing div %s" % div)
                for token in token_extractor.extract_tokens(div):
                    self.logger.debug('Extracted text token: "%s"' % token)
                    tokens.append(token)
        # Step 2: convert tokens into operations
        # Notes:
        # * the code below expects pieces of information to be in the date-label-amount order;
        #   could we achieve a heuristic smart enough to guess this order?
        # * unlike the former code, we parse every operation
        operations = []
        current_operation = {}
        for token in tokens:
            self.logger.debug('Analyzing token: "%s"' % token)
            date_analysis = self.look_like_date_only(token)
            if date_analysis:
                current_operation = {}
                current_operation['date'] = date_analysis.groups()[0]
            else:
                date_desc_analysis = self.look_like_date_and_description(token)
                if date_desc_analysis:
                    current_operation = {}
                    current_operation['date'] = date_desc_analysis.groups()[0]
                    current_operation['label'] = date_desc_analysis.groups()[1]
                elif self.look_like_amount(token):
                    # we consider the amount is the last information we get for an operation
                    current_operation['amount'] = self.clean_amount(token)
                    if current_operation.get('label') is not None and current_operation.get('date') is not None:
                        self.logger.debug('Parsed operation: %s: %s: %s' % (current_operation['date'], current_operation['label'], current_operation['amount']))
                        operations.append(current_operation)
                        current_operation = {}
                else:
                    if current_operation.get('label') is not None:
                        current_operation['label'] = u'%s %s' % (current_operation['label'], token)
                    else:
                        current_operation['label'] = token
        # Step 3: yield adequate transactions
        index = start_index
        for op in operations[start_offset:]:
            self.logger.debug('will yield the following transaction with index %d: %s: %s: %s' % (index, op['date'], op['label'], op['amount']))
            transaction = Transaction(index)
            index += 1
            transaction.amount = op['amount']
            transaction.date = self.date_from_string(op['date'])
            transaction.raw = op['label']
            yield transaction