diff --git a/modules/amazon/__init__.py b/modules/amazon/__init__.py
new file mode 100644
index 00000000..2032e091
--- /dev/null
+++ b/modules/amazon/__init__.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Oleg Plakhotniuk
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+from .module import AmazonModule
+
+__all__ = ['AmazonModule']
diff --git a/modules/amazon/browser.py b/modules/amazon/browser.py
new file mode 100644
index 00000000..b9b70276
--- /dev/null
+++ b/modules/amazon/browser.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Oleg Plakhotniuk
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+from weboob.browser import LoginBrowser, URL, need_login
+from weboob.browser.exceptions import ServerError, HTTPNotFound
+from weboob.capabilities.base import Currency
+from weboob.capabilities.shop import OrderNotFound
+from weboob.exceptions import BrowserIncorrectPassword
+
+from .pages import HomePage, LoginPage, AmazonPage, HistoryPage, \
+ OrderOldPage, OrderNewPage
+
+
+__all__ = ['Amazon']
+
+
+class Amazon(LoginBrowser):
+ BASEURL = 'https://www.amazon.com'
+ home = URL(r'http://www\.amazon\.com/$', HomePage)
+ login = URL(r'/ap/signin/.*$', LoginPage)
+ history = URL(r'/gp/css/order-history.*$', HistoryPage)
+ order_old = URL(r'/gp/css/summary.*$',
+ r'/gp/css/summary/edit.html\?orderID=%\(order_id\)s',
+ OrderOldPage)
+ order_new = URL(r'/gp/css/summary.*$',
+ r'/gp/your-account/order-details.*$',
+ r'/gp/your-account/order-details\?orderID=%\(order_id\)s',
+ OrderNewPage)
+ unknown = URL(r'/.*$', AmazonPage)
+
+ def get_currency(self):
+ # Amazon uses only U.S. dollars.
+ return Currency.get_currency(u'$')
+
+ def get_order(self, id_):
+ order = self.to_order(id_).order()
+ if order:
+ return order
+ else:
+ raise OrderNotFound()
+
+ def iter_orders(self):
+ histRoot = self.to_history()
+ for histYear in histRoot.iter_years():
+ for order in histYear.iter_orders():
+ if order.order():
+ yield order.order()
+
+ def iter_payments(self, order):
+ return self.to_order(order.id).payments()
+
+ def iter_items(self, order):
+ return self.to_order(order.id).items()
+
+ @need_login
+ def to_history(self):
+ self.history.stay_or_go()
+ assert self.history.is_here()
+ return self.page
+
+ @need_login
+ def to_order(self, order_id):
+ """
+ Amazon updates its website in stages: they reroute a random part of
+ their users to new pages, and the rest to old ones.
+ """
+ if (not self.order_new.is_here() and not self.order_old.is_here()) \
+ or self.page.order_number() != order_id:
+ try:
+ self.order_new.go(order_id=order_id)
+ except HTTPNotFound:
+ self.order_old.go(order_id=order_id)
+ if (not self.order_new.is_here() and not self.order_old.is_here()) \
+ or self.page.order_number() != order_id:
+ raise OrderNotFound()
+ return self.page
+
+ def do_login(self):
+ self.session.cookies.clear()
+ self.home.go().to_login().login(self.username, self.password)
+ if not self.page.logged:
+ raise BrowserIncorrectPassword()
+
+ def location(self, *args, **kwargs):
+ """
+ Amazon throws 500 HTTP status code for apparently valid requests
+ from time to time. Requests eventually succeed after retrying.
+ """
+ for i in xrange(self.MAX_RETRIES):
+ try:
+ return super(Amazon, self).location(*args, **kwargs)
+ except ServerError as e:
+ pass
+ raise e
diff --git a/modules/amazon/favicon.png b/modules/amazon/favicon.png
new file mode 100644
index 00000000..884408f8
Binary files /dev/null and b/modules/amazon/favicon.png differ
diff --git a/modules/amazon/module.py b/modules/amazon/module.py
new file mode 100644
index 00000000..62717937
--- /dev/null
+++ b/modules/amazon/module.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Oleg Plakhotniuk
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+from weboob.capabilities.shop import CapShop
+from weboob.tools.backend import Module, BackendConfig
+from weboob.tools.value import ValueBackendPassword
+
+from .browser import Amazon
+
+
+__all__ = ['AmazonModule']
+
+
+class AmazonModule(Module, CapShop):
+ NAME = 'amazon'
+ MAINTAINER = u'Oleg Plakhotniuk'
+ EMAIL = 'olegus8@gmail.com'
+ VERSION = '1.1'
+ LICENSE = 'AGPLv3+'
+ DESCRIPTION = u'Amazon'
+ CONFIG = BackendConfig(
+ ValueBackendPassword('email', label='Username', masked=False),
+ ValueBackendPassword('password', label='Password'))
+ BROWSER = Amazon
+
+ def create_default_browser(self):
+ return self.create_browser(self.config['email'].get(),
+ self.config['password'].get())
+
+ def get_currency(self):
+ return self.browser.get_currency()
+
+ def get_order(self, id_):
+ return self.browser.get_order(id_)
+
+ def iter_orders(self):
+ return self.browser.iter_orders()
+
+ def iter_payments(self, order):
+ return self.browser.iter_payments(order)
+
+ def iter_items(self, order):
+ return self.browser.iter_items(order)
diff --git a/modules/amazon/pages.py b/modules/amazon/pages.py
new file mode 100644
index 00000000..166d0a85
--- /dev/null
+++ b/modules/amazon/pages.py
@@ -0,0 +1,336 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Oleg Plakhotniuk
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+from weboob.tools.capabilities.bank.transactions import \
+ AmericanTransaction as AmTr
+from weboob.capabilities.shop import Order, Payment, Item
+from weboob.browser.pages import HTMLPage, pagination, NextPage
+
+from datetime import datetime
+from decimal import Decimal
+import re
+
+
+class AmazonPage(HTMLPage):
+ @property
+ def logged(self):
+ return bool(self.doc.xpath(u'//*[contains(text(),"Sign Out")]'))
+
+
+class HomePage(AmazonPage):
+ def to_login(self):
+ url1 = self.doc.xpath('//a[@id="nav-link-yourAccount"]/@href')
+ url2 = self.doc.xpath('//a[@id="nav-your-account"]/@href')
+ self.browser.location((url1 or url2)[0])
+ return self.browser.page
+
+
+class LoginPage(AmazonPage):
+ def login(self, email, password):
+ form = self.get_form(name='signIn')
+ form['email'] = email
+ form['password'] = password
+ form.submit()
+
+
+class HistoryPage(AmazonPage):
+ def iter_years(self):
+ for year in self.opt_years():
+ yield self.to_year(year)
+
+ @pagination
+ def iter_orders(self):
+ for id_ in self.doc.xpath(
+ u'//span[contains(text(),"Order #")]/../span[2]/text()'):
+ yield self.browser.to_order(id_.strip())
+ for next_ in self.doc.xpath(u'//ul[@class="a-pagination"]'
+ u'//a[contains(text(),"Next")]/@href'):
+ raise NextPage(next_)
+
+ def to_year(self, year):
+ form = self.get_form('//form[contains(@class,"time-period-chooser")]')
+ form['orderFilter'] = [year]
+ form.submit()
+ return self.browser.page
+
+ def opt_years(self):
+ return [x for x in self.doc.xpath(
+ '//select[@name="orderFilter"]/option/@value'
+ ) if x.startswith('year-')]
+
+
+class OrderNewPage(AmazonPage):
+ is_here = u'//*[contains(text(),"Ordered on")]'
+
+ def order(self):
+ # Reports only fully shipped and delivered orders, because they have
+ # finalized payment amounts.
+ # Payment for not yet shipped orders may change, and is not always
+ # available.
+ for s in [u'Not Yet Shipped', u'Preparing for Shipment',
+ u'Shipping now']:
+ if self.doc.xpath(u'//*[contains(text(),"%s")]' % s):
+ return None
+
+ order = Order(id=self.order_number())
+ order.date = self.order_date()
+ order.tax = self.tax()
+ order.discount = self.discount()
+ order.shipping = self.shipping()
+ return order
+
+ def order_date(self):
+ return datetime.strptime(
+ re.match('.*Ordered on ([^ ]+ [0-9]+, [0-9]+) .*',
+ self.date_num()).group(1),
+ '%B %d, %Y')
+
+ def order_number(self):
+ m = re.match('.*Order# ([^ ]+) .*', self.date_num())
+ if m:
+ return m.group(1)
+
+ def payments(self):
+ if self.gift():
+ pmt = Payment()
+ pmt.date = self.order_date()
+ pmt.method = u'GIFT CARD'
+ pmt.amount = -self.gift()
+ yield pmt
+ for trans in self.transactions():
+ yield trans
+
+ def date_num(self):
+ return u' '.join(self.doc.xpath(
+ '//div[contains(text(),"Ordered on")]/text()')).replace('\n', '')
+
+ def tax(self):
+ return self.amount(u'Estimated tax to be collected')
+
+ def shipping(self):
+ return self.amount(u'Free shipping', u'Free Shipping',
+ u'Shipping & Handling')
+
+ def discount(self):
+ return self.amount(u'Promotion applied', u'Promotion Applied',
+ u'Subscribe & Save', u'Your Coupon Savings')
+
+ def gift(self):
+ return self.amount(u'Gift Card Amount')
+
+ def amount(self, *names):
+ return Decimal(sum(AmTr.decimal_amount(amount.strip())
+ for n in names for amount in self.doc.xpath(
+ '//span[contains(text(),"%s:")]/../..//span[2]/text()' % n)))
+
+ def transactions(self):
+ for row in self.doc.xpath('//span[contains(text(),"Transactions")]'
+ '/../../div/div'):
+ text = row.text_content().strip().replace('\n', ' ')
+ if u'Items shipped:' not in text:
+ continue
+ date, method, amount = re.match(
+ '.* ' '([A-z]+ [0-9]+, [0-9]+)'
+ '[ -]+' '([A-z][^:]+)'
+ ': +' '([^ ]+)', text).groups()
+ date = datetime.strptime(date, '%B %d, %Y')
+ method = method.replace(u'ending in ', u'').upper()
+ amount = AmTr.decimal_amount(amount)
+ pmt = Payment()
+ pmt.date = date
+ pmt.method = method
+ pmt.amount = amount
+ yield pmt
+
+ def items(self):
+ for item in self.doc.xpath('//div[contains(@class,"a-box shipment")]'
+ '/div/div/div/div/div/div'):
+ url = (item.xpath(u'*//a[contains(@href,"/gp/product")]/@href') +
+ [u''])[0]
+ label = u''.join(item.xpath(
+ '*//a[contains(@href,"/gp/product")]/text()')).strip()
+ price = u''.join(x.strip() for x in item.xpath(
+ '*//span[contains(text(),"$")]/text()')
+ if x.strip().startswith('$'))
+ price = AmTr.decimal_amount(price)
+ multi = re.match(u'([0-9]+) of (.*)', label)
+ if multi:
+ amount, label = multi.groups()
+ price *= Decimal(amount)
+ if url:
+ url = unicode(self.browser.BASEURL) + \
+ re.match(u'(/gp/product/.*)/ref=.*', url).group(1)
+ if label and price:
+ itm = Item()
+ itm.label = label
+ itm.url = url
+ itm.price = price
+ yield itm
+
+
+class OrderOldPage(AmazonPage):
+ is_here = u'//*[contains(text(),"Amazon.com order number")]'
+
+ def order(self):
+ # Reports only fully shipped and delivered orders, because they have
+ # finalized payment amounts.
+ # Payment for not yet shipped orders may change, and are not always
+ # available.
+ for s in [u'Not Yet Shipped', u'Preparing for Shipment',
+ u'Shipping now']:
+ if self.doc.xpath(u'//b[contains(text(),"%s")]' % s):
+ return None
+
+ order = Order(id=self.order_number())
+ order.date = self.order_date()
+ order.tax = self.tax()
+ order.discount = self.discount()
+ order.shipping = self.shipping()
+ return order
+
+ def order_date(self):
+ return datetime.strptime(u' '.join(self.doc.xpath(
+ u'//b[contains(text(),"Order Placed")]/../text()')).strip(),
+ '%B %d, %Y')
+
+ def order_number(self):
+ return u' '.join(self.doc.xpath(
+ u'//td/b[contains(text(),"Amazon.com order number")]/../text()')
+ ).strip()
+
+ def tax(self):
+ return self.sum_amounts(u'Sales Tax:')
+
+ def discount(self):
+ return self.sum_amounts(u'Subscribe & Save:', u'Promotion applied:',
+ u'Promotion Applied:', u'Your Coupon Savings:')
+
+ def shipping(self):
+ return self.sum_amounts(u'Shipping & Handling:', u'Free shipping:',
+ u'Free Shipping:')
+
+ def payments(self):
+ for shmt in self.shipments():
+ gift = self.gift(shmt)
+ if gift:
+ pmt = Payment()
+ pmt.date = self.order_date()
+ pmt.method = u'GIFT CARD'
+ pmt.amount = -gift
+ yield pmt
+ transactions = list(self.transactions())
+ if transactions:
+ for t in transactions:
+ yield t
+ else:
+ for method in self.paymethods():
+ pmt = Payment()
+ pmt.date = self.order_date()
+ pmt.method = method
+ pmt.amount = self.grand_total()
+ yield pmt
+ break
+
+ def shipments(self):
+ for cue in (u'Shipment #', u'Subscribe and Save Shipment'):
+ for shmt in self.doc.xpath('//b[contains(text(),"%s")]' % cue):
+ yield shmt
+
+ def items(self):
+ for shmt in self.shipments():
+ root = shmt.xpath(u'../../../../../../../..'
+ u'//b[text()="Items Ordered"]')[0]
+ for item in root.xpath('../../../tr')[1:]:
+ count = url = label = None
+ for div in item.xpath('*//div'):
+ m = re.match(u'^\s*(\d+)\s*of:(.*)$', div.text,
+ re.MULTILINE + re.DOTALL)
+ if not m:
+ continue
+ count = Decimal(m.group(1).strip())
+ label = unicode(m.group(2).strip())
+ if label:
+ url = u''
+ else:
+ a = div.xpath('*//a[contains(@href,"/gp/product")]')[0]
+ url = unicode(a.attrib['href'])
+ label = unicode(a.text.strip())
+ price1 = item.xpath('*//div')[-1].text.strip()
+ price = count * AmTr.decimal_amount(price1)
+
+ itm = Item()
+ itm.label = label
+ itm.url = url
+ itm.price = price
+ yield itm
+
+ def sum_amounts(self, *names):
+ return sum(self.amount(shmt,x) for shmt in self.shipments()
+ for x in names)
+
+ def amount(self, shmt, name):
+ for root in shmt.xpath(u'../../../../../../../..'
+ u'//td[text()="Item(s) Subtotal: "]/../..'):
+ for node in root.xpath(u'tr/td[text()="%s"]' % name):
+ return AmTr.decimal_amount(
+ node.xpath('../td')[-1].text.strip())
+ for node in root.xpath(u'tr/td/b[text()="%s"]' % name):
+ return AmTr.decimal_amount(
+ node.xpath('../../td/b')[-1].text.strip())
+ return Decimal(0)
+
+ def gift(self, shmt):
+ return self.amount(shmt, u'Gift Card Amount:')
+
+ def paymethods(self):
+ root = self.doc.xpath('//b[text()="Payment Method: "]/..')[0]
+ text = root.text_content().strip()
+ while text:
+ for pattern in [
+ u'^.*Payment Method:',
+ u'^([^\n]+)\n +\| Last digits: +([0-9]+)\n',
+ u'^Gift Card\n', # Skip gift card.
+ u'^Billing address.*$']:
+ match = re.match(pattern, text, re.DOTALL+re.MULTILINE)
+ if match:
+ text = text[match.end():].strip()
+ if match.groups():
+ yield u' '.join(match.groups()).upper()
+ break
+ else:
+ break
+
+ def transactions(self):
+ for tr in self.doc.xpath(
+ u'//div[contains(b,"Credit Card transactions")]'
+ u'/following-sibling::table[1]/tr'):
+ label, date = tr.xpath('td[1]/text()')[0].strip().split(u'\xa0')
+ amount = tr.xpath('td[2]/text()')[0].strip()
+ date = datetime.strptime(date, '%B %d, %Y:')
+ method = label.replace(u'ending in ', u'')[:-1].upper()
+ amount = AmTr.decimal_amount(amount)
+ pmt = Payment()
+ pmt.date = date
+ pmt.method = method
+ pmt.amount = amount
+ yield pmt
+
+ def grand_total(self):
+ return AmTr.decimal_amount(self.doc.xpath(
+ '//td[b="Grand Total:"]/following-sibling::td[1]/b')[0].text)
diff --git a/modules/amazon/test.py b/modules/amazon/test.py
new file mode 100644
index 00000000..f0b8f3d7
--- /dev/null
+++ b/modules/amazon/test.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Oleg Plakhotniuk
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+from weboob.tools.test import BackendTest
+
+
+class AmazonTest(BackendTest):
+ MODULE = 'amazon'
+
+ def test_history(self):
+ """
+ Test that at least one item was ordered in the whole history.
+ """
+ b = self.backend
+ items = (i for o in b.iter_orders() for i in b.iter_items(o))
+ item = next(items, None)
+ self.assertNotEqual(item, None)