[amazon] Add French Support and Shopoob compatibilities

This commit is contained in:
Kitof 2015-02-18 13:04:31 +01:00 committed by Romain Bignon
commit 2012ac3690
5 changed files with 453 additions and 10 deletions

11
modules/amazon/browser.py Normal file → Executable file
View file

@ -22,25 +22,25 @@ from requests.exceptions import Timeout
from weboob.browser import LoginBrowser, URL, need_login
from weboob.browser.exceptions import ServerError, HTTPNotFound
from weboob.capabilities.base import Currency
from weboob.capabilities.shop import OrderNotFound
from weboob.exceptions import BrowserIncorrectPassword
from .pages import HomePage, LoginPage, AmazonPage, HistoryPage, \
OrderOldPage, OrderNewPage
__all__ = ['Amazon']
class Amazon(LoginBrowser):
BASEURL = 'https://www.amazon.com'
MAX_RETRIES = 10
home = URL(r'http://www\.amazon\.com/$', HomePage)
CURRENCY = u'$'
home = URL(r'/$', r'http://www.amazon.com/$', HomePage)
login = URL(r'/ap/signin/.*$', LoginPage)
history = URL(r'/gp/css/order-history.*$', HistoryPage)
order_old = URL(r'/gp/css/summary.*$',
r'/gp/css/summary/edit.html\?orderID=%\(order_id\)s',
r'/gp/digital/your-account/order-summary.html.*$',
r'/gp/digital/your-account/orderPe-summary.html\?orderID=%\(order_id\)s',
OrderOldPage)
order_new = URL(r'/gp/css/summary.*$',
r'/gp/your-account/order-details.*$',
@ -49,8 +49,7 @@ class Amazon(LoginBrowser):
unknown = URL(r'/.*$', AmazonPage)
def get_currency(self):
# Amazon uses only U.S. dollars.
return Currency.get_currency(u'$')
return self.CURRENCY
def get_order(self, id_):
order = self.to_order(id_).order()

View file

View file

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2015 Christophe Lampin
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser import URL
from ..browser import Amazon
from .pages import HomePage, LoginPage, AmazonPage, HistoryPage, \
OrderOldPage, OrderNewPage
__all__ = ['AmazonFR']
class AmazonFR(Amazon):
BASEURL = 'https://www.amazon.fr'
CURRENCY = u''
home = URL(r'/$', r'.*/homepage\.html.*', HomePage)
login = URL(r'/ap/signin/.*$', LoginPage)
history = URL(r'/gp/css/order-history.*$', HistoryPage)
order_old = URL(r'/gp/css/summary.*$',
r'/gp/css/summary/edit.html\?orderID=%\(order_id\)s',
r'/gp/digital/your-account/order-summary.html.*$',
r'/gp/digital/your-account/orderPe-summary.html\?orderID=%\(order_id\)s',
OrderOldPage)
order_new = URL(r'/gp/css/summary.*$',
r'/gp/your-account/order-details.*$',
r'/gp/your-account/order-details\?orderID=%\(order_id\)s',
OrderNewPage)
unknown = URL(r'/.*$', AmazonPage)

381
modules/amazon/fr/pages.py Executable file
View file

@ -0,0 +1,381 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Oleg Plakhotniuk
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.shop import Order, Payment, Item
from weboob.browser.pages import HTMLPage, pagination, NextPage
from weboob.capabilities.base import empty
from datetime import datetime
from decimal import Decimal
import re
# Ugly array to avoid the use of french locale
FRENCH_MONTHS = [u'janvier', u'février', u'mars', u'avril', u'mai', u'juin', u'juillet', u'août', u'septembre', u'octobre', u'novembre', u'décembre']
class AmazonPage(HTMLPage):
@property
def logged(self):
return bool(self.doc.xpath(u'//*[contains(text(),"Déconnectez-vous")]'))
class HomePage(AmazonPage):
def to_login(self):
url1 = self.doc.xpath('//a[@id="nav-link-yourAccount"]/@href')
url2 = self.doc.xpath('//a[@id="nav-your-account"]/@href')
self.browser.location((url1 or url2)[0])
return self.browser.page
class LoginPage(AmazonPage):
def login(self, email, password):
form = self.get_form(name='signIn')
form['email'] = email
form['password'] = password
form.submit()
class HistoryPage(AmazonPage):
forced_encoding=True
ENCODING='UTF-8'
def iter_years(self):
for year in self.opt_years():
yield self.to_year(year)
@pagination
def iter_orders(self):
for id_ in self.doc.xpath(u'//span[contains(text(),"N° de commande")]/../span[2]/text()'):
yield self.browser.to_order(id_.strip())
for next_ in self.doc.xpath(u'//ul[@class="a-pagination"]'
u'//a[contains(text(),"Suivante")]/@href'):
raise NextPage(next_)
def to_year(self, year):
form = self.get_form('//form[contains(@class,"time-period-chooser")]')
form['orderFilter'] = [year]
form.submit()
return self.browser.page
def opt_years(self):
return [x for x in self.doc.xpath(
'//select[@name="orderFilter"]/option/@value'
) if x.startswith('year-')]
class OrderPage(AmazonPage):
def shouldSkip(self):
# Reports only fully shipped and delivered orders, because they have
# finalized payment amounts.
# Payment for not yet shipped orders may change, and is not always
# available.
return bool([x for s in [u'En préparation pour expédition']
for x in self.doc.xpath(u'//*[contains(text(),"%s")]' % s)])
def decimal_amount(self, amount):
m = re.match(u'.*EUR ([,0-9]+).*', amount)
if m:
return Decimal(m.group(1).replace(",","."))
def month_to_int(self, text):
for (idx, month) in enumerate(FRENCH_MONTHS):
text = text.replace(month, str(idx + 1))
return text
class OrderNewPage(OrderPage):
forced_encoding=True
ENCODING='ISO-8859-15'
is_here = u'//*[contains(text(),"Commandé le")]'
# def text(self):
# return self.response.text.decode('iso-8859-1').encode('utf-8')
def order(self):
if not self.shouldSkip():
order = Order(id=self.order_number())
order.date = self.order_date()
order.tax = self.tax()
order.discount = self.discount()
order.shipping = self.shipping()
order.total = self.grand_total()
return order
def order_date(self):
return datetime.strptime(
re.match(u'.*Commandé le ([0-9]+ [0-9]+ [0-9]+) .*',
self.month_to_int(self.date_num())).group(1),
'%d %m %Y')
def order_number(self):
m = re.match(u'.*N° de commande : +([^ ]+) .*', self.date_num())
if m:
return m.group(1)
def payments(self):
if self.gift():
pmt = Payment()
pmt.date = self.order_date()
pmt.method = u'GIFT CARD'
pmt.amount = -self.gift()
yield pmt
transactions = list(self.transactions())
if transactions:
for t in transactions:
yield t
else:
for method in self.paymethods():
pmt = Payment()
pmt.date = self.order_date()
pmt.method = method
pmt.amount = self.grand_total()
yield pmt
break
def paymethods(self):
for root in self.doc.xpath(u'//h5[contains(text(),"Méthode de paiement")]'):
alt = root.xpath('../div/img/@alt')[0]
span = root.xpath('../div/span/text()')[0]
digits = re.match(r'[^0-9]*([0-9]+)[^0-9]*', span).group(1)
yield u'%s %s' % (alt, digits)
def grand_total(self):
return self.decimal_amount(self.doc.xpath(
'//span[contains(text(),"Montant total TTC")]/..'
'/following-sibling::div[1]/span/text()')[0].strip())
def date_num(self):
return u' '.join(self.doc.xpath(
'//span[@class="order-date-invoice-item"]/text()'
)).replace('\n', '')
def tax(self):
return self.amount(u' TVA')
def shipping(self):
return self.amount(u'Livraison :')
def discount(self):
return self.amount(u'Promotion applied', u'Promotion Applied',
u'Subscribe & Save', u'Your Coupon Savings',
u'Lightning Deal')
def gift(self):
return self.amount(u'Gift Card Amount')
def amount(self, *names):
return Decimal(sum(self.decimal_amount(amount.strip())
for n in names for amount in self.doc.xpath(
'(//span[contains(text(),"%s")]/../..//span)[2]/text()' % n)))
def transactions(self):
for row in self.doc.xpath('//span[contains(text(),"Transactions")]'
'/../../div/div'):
text = row.text_content().strip().replace('\n', ' ')
if u'Expédition' not in text:
continue
date, method, amount = re.match(
'.* ' '([0-9]+ [^ ]+ [0-9]+)'
'[ -]+' '([A-z][^:]+)'
': +' '(EUR [^ ]+)', text).groups()
date = datetime.strptime(self.month_to_int(date), '%d %m %Y')
method = method.replace(u'finissant par ', u'').upper()
amount = self.decimal_amount(amount)
pmt = Payment()
pmt.date = date
pmt.method = method
pmt.amount = amount
yield pmt
def items(self):
for item in self.doc.xpath('//div[contains(@class,"a-box shipment")]'
'/div/div/div/div/div/div'):
url = (item.xpath(u'*//a[contains(@href,"/gp/product")]/@href') +
[u''])[0]
label = u''.join(item.xpath(
'*//a[contains(@href,"/gp/product")]/text()')).strip()
price = u''.join(x.strip() for x in item.xpath(
'*//span[contains(text(),"EUR")]/text()')
if x.strip().startswith('EUR'))
price = self.decimal_amount(price)
multi = re.match(u'([0-9]+) de (.*)', label)
if multi:
amount, label = multi.groups()
price *= Decimal(amount)
if url:
url = unicode(self.browser.BASEURL) + \
re.match(u'(/gp/product/.*)/ref=.*', url).group(1)
if label and price:
itm = Item()
itm.label = label
itm.url = url
itm.price = price
yield itm
class OrderOldPage(OrderPage):
forced_encoding=True
ENCODING='ISO-8859-15'
is_here = u'//*[contains(text(),"Amazon.fr numéro de commande")]'
def order(self):
if not self.shouldSkip():
order = Order(id=self.order_number())
order.date = self.order_date()
order.tax = Decimal(self.tax()) if not empty(self.tax()) else Decimal(0.00)
order.discount = Decimal(self.discount()) if not empty(self.discount()) else Decimal(0.00)
order.shipping = Decimal(self.shipping()) if not empty(self.shipping()) else Decimal(0.00)
order.total =Decimal(self.grand_total()) if not empty(self.grand_total()) else Decimal(0.00)
return order
def order_date(self):
date_str = self.doc.xpath(u'//b[contains(text(),"Commande numérique")]')[0].text
month_str = re.match(u'.*Commande numérique : [0-9]+ ([^ ]+) [0-9]+.*', date_str).group(1)
return datetime.strptime(
re.match(u'.*Commande numérique : ([0-9]+ [0-9]+ [0-9]+).*',
date_str.replace(month_str, str(FRENCH_MONTHS.index(month_str) + 1))).group(1),
'%d %m %Y')
def order_number(self):
num_com = u' '.join(self.doc.xpath(
u'//b[contains(text(),"Amazon.fr numéro de commande")]/../text()')
).strip()
return num_com
def tax(self):
return self.sum_amounts(u'TVA:')
def discount(self):
return self.sum_amounts(u'Subscribe & Save:', u'Promotion applied:',
u'Promotion Applied:', u'Your Coupon Savings:')
def shipping(self):
return self.sum_amounts(u'Shipping & Handling:', u'Free shipping:',
u'Free Shipping:')
def payments(self):
for shmt in self.shipments():
gift = self.gift(shmt)
if gift:
pmt = Payment()
pmt.date = self.order_date()
pmt.method = u'GIFT CARD'
pmt.amount = -gift
yield pmt
transactions = list(self.transactions())
if transactions:
for t in transactions:
yield t
else:
for method in self.paymethods():
pmt = Payment()
pmt.date = self.order_date()
pmt.method = method
pmt.amount = self.grand_total()
yield pmt
break
def shipments(self):
for cue in (u'Shipment #', u'Subscribe and Save Shipment'):
for shmt in self.doc.xpath('//b[contains(text(),"%s")]' % cue):
yield shmt
def items(self):
for shmt in self.shipments():
root = shmt.xpath(u'../../../../../../../..'
u'//b[text()="Articles commandés"]')[0]
for item in root.xpath('../../../tr')[1:]:
count = url = label = None
for div in item.xpath('*//div'):
m = re.match(u'^\s*(\d+)\s*of:(.*)$', div.text,
re.MULTILINE + re.DOTALL)
if not m:
continue
count = Decimal(m.group(1).strip())
label = unicode(m.group(2).strip())
if label:
url = u''
else:
a = div.xpath('*//a[contains(@href,"/gp/product")]')[0]
url = unicode(a.attrib['href'])
label = unicode(a.text.strip())
price1 = item.xpath('*//div')[-1].text.strip()
price = count * self.decimal_amount(price1)
itm = Item()
itm.label = label
itm.url = url
itm.price = price
yield itm
def sum_amounts(self, *names):
return sum(self.amount(shmt,x) for shmt in self.shipments()
for x in names)
def amount(self, shmt, name):
for root in shmt.xpath(u'../../../../../../../..'
u'//td[text()="Sous-total articles: "]/../..'):
for node in root.xpath(u'tr/td[text()="%s"]' % name):
return self.decimal_amount(
node.xpath('../td')[-1].text.strip())
for node in root.xpath(u'tr/td/b[text()="%s"]' % name):
return self.decimal_amount(
node.xpath('../../td/b')[-1].text.strip())
return Decimal(0)
def gift(self, shmt):
return self.amount(shmt, u'Gift Card Amount:')
def paymethods(self):
root = self.doc.xpath('//b[text()="Payment Method: "]/..')
if len(root) == 0:
return
root = root[0]
text = root.text_content().strip()
while text:
for pattern in [
u'^.*Payment Method:',
u'^([^\n]+)\n +\| Last digits: +([0-9]+)\n',
u'^Gift Card\n', # Skip gift card.
u'^Billing address.*$']:
match = re.match(pattern, text, re.DOTALL+re.MULTILINE)
if match:
text = text[match.end():].strip()
if match.groups():
yield u' '.join(match.groups()).upper()
break
else:
break
def transactions(self):
for tr in self.doc.xpath(
u'//div[contains(b,"Credit Card transactions")]'
u'/following-sibling::table[1]/tr'):
label, date = tr.xpath('td[1]/text()')[0].strip().split(u'\xa0')
amount = tr.xpath('td[2]/text()')[0].strip()
date = datetime.strptime(date, '%B %d, %Y:')
method = label.replace(u'ending in ', u'')[:-1].upper()
amount = self.decimal_amount(amount)
pmt = Payment()
pmt.date = date
pmt.method = method
pmt.amount = amount
yield pmt
def grand_total(self):
return self.decimal_amount(self.doc.xpath(
u'//td[contains(b,"Total pour cette commande")]')[0].text)

27
modules/amazon/module.py Normal file → Executable file
View file

@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Oleg Plakhotniuk
#
@ -18,11 +18,13 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.shop import CapShop
from weboob.capabilities.shop import CapShop, Order
from weboob.tools.backend import Module, BackendConfig
from weboob.tools.value import ValueBackendPassword
from weboob.tools.value import Value, ValueBackendPassword
from weboob.tools.ordereddict import OrderedDict
from .browser import Amazon
from .fr.browser import AmazonFR
__all__ = ['AmazonModule']
@ -33,12 +35,24 @@ class AmazonModule(Module, CapShop):
VERSION = '1.1'
LICENSE = 'AGPLv3+'
DESCRIPTION = u'Amazon'
website_choices = OrderedDict([(k, u'%s (%s)' % (v, k)) for k, v in sorted({
'www.amazon.com': u'Amazon.com',
'www.amazon.fr': u'Amazon France',
}.iteritems())])
BROWSERS = {
'www.amazon.com': Amazon,
'www.amazon.fr': AmazonFR,
}
CONFIG = BackendConfig(
Value('website', label=u'Website', choices=website_choices, default='www.amazon.com'),
ValueBackendPassword('email', label='Username', masked=False),
ValueBackendPassword('password', label='Password'))
BROWSER = Amazon
def create_default_browser(self):
self.BROWSER = self.BROWSERS[self.config['website'].get()]
return self.create_browser(self.config['email'].get(),
self.config['password'].get())
@ -49,10 +63,15 @@ class AmazonModule(Module, CapShop):
return self.browser.get_order(id_)
def iter_orders(self):
return self.browser.iter_orders()
def iter_payments(self, order):
if not isinstance(order, Order):
order = self.get_order(order)
return self.browser.iter_payments(order)
def iter_items(self, order):
if not isinstance(order, Order):
order = self.get_order(order)
return self.browser.iter_items(order)