From 456271a2bda32a667c89430ad4075286243cc860 Mon Sep 17 00:00:00 2001 From: Oleg Plakhotniuk Date: Sat, 29 Nov 2014 12:02:06 -0600 Subject: [PATCH] Amazon online store module (http://www.amazon.com). Closes #1663 --- modules/amazon/__init__.py | 23 +++ modules/amazon/browser.py | 111 ++++++++++++ modules/amazon/favicon.png | Bin 0 -> 2231 bytes modules/amazon/module.py | 60 +++++++ modules/amazon/pages.py | 336 +++++++++++++++++++++++++++++++++++++ modules/amazon/test.py | 33 ++++ 6 files changed, 563 insertions(+) create mode 100644 modules/amazon/__init__.py create mode 100644 modules/amazon/browser.py create mode 100644 modules/amazon/favicon.png create mode 100644 modules/amazon/module.py create mode 100644 modules/amazon/pages.py create mode 100644 modules/amazon/test.py diff --git a/modules/amazon/__init__.py b/modules/amazon/__init__.py new file mode 100644 index 00000000..2032e091 --- /dev/null +++ b/modules/amazon/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Oleg Plakhotniuk +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .module import AmazonModule + +__all__ = ['AmazonModule'] diff --git a/modules/amazon/browser.py b/modules/amazon/browser.py new file mode 100644 index 00000000..b9b70276 --- /dev/null +++ b/modules/amazon/browser.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Oleg Plakhotniuk +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.browser import LoginBrowser, URL, need_login +from weboob.browser.exceptions import ServerError, HTTPNotFound +from weboob.capabilities.base import Currency +from weboob.capabilities.shop import OrderNotFound +from weboob.exceptions import BrowserIncorrectPassword + +from .pages import HomePage, LoginPage, AmazonPage, HistoryPage, \ + OrderOldPage, OrderNewPage + + +__all__ = ['Amazon'] + + +class Amazon(LoginBrowser): + BASEURL = 'https://www.amazon.com' + home = URL(r'http://www\.amazon\.com/$', HomePage) + login = URL(r'/ap/signin/.*$', LoginPage) + history = URL(r'/gp/css/order-history.*$', HistoryPage) + order_old = URL(r'/gp/css/summary.*$', + r'/gp/css/summary/edit.html\?orderID=%\(order_id\)s', + OrderOldPage) + order_new = URL(r'/gp/css/summary.*$', + r'/gp/your-account/order-details.*$', + r'/gp/your-account/order-details\?orderID=%\(order_id\)s', + OrderNewPage) + unknown = URL(r'/.*$', AmazonPage) + + def get_currency(self): + # Amazon uses only U.S. dollars. + return Currency.get_currency(u'$') + + def get_order(self, id_): + order = self.to_order(id_).order() + if order: + return order + else: + raise OrderNotFound() + + def iter_orders(self): + histRoot = self.to_history() + for histYear in histRoot.iter_years(): + for order in histYear.iter_orders(): + if order.order(): + yield order.order() + + def iter_payments(self, order): + return self.to_order(order.id).payments() + + def iter_items(self, order): + return self.to_order(order.id).items() + + @need_login + def to_history(self): + self.history.stay_or_go() + assert self.history.is_here() + return self.page + + @need_login + def to_order(self, order_id): + """ + Amazon updates its website in stages: they reroute a random part of + their users to new pages, and the rest to old ones. + """ + if (not self.order_new.is_here() and not self.order_old.is_here()) \ + or self.page.order_number() != order_id: + try: + self.order_new.go(order_id=order_id) + except HTTPNotFound: + self.order_old.go(order_id=order_id) + if (not self.order_new.is_here() and not self.order_old.is_here()) \ + or self.page.order_number() != order_id: + raise OrderNotFound() + return self.page + + def do_login(self): + self.session.cookies.clear() + self.home.go().to_login().login(self.username, self.password) + if not self.page.logged: + raise BrowserIncorrectPassword() + + def location(self, *args, **kwargs): + """ + Amazon throws 500 HTTP status code for apparently valid requests + from time to time. Requests eventually succeed after retrying. + """ + for i in xrange(self.MAX_RETRIES): + try: + return super(Amazon, self).location(*args, **kwargs) + except ServerError as e: + pass + raise e diff --git a/modules/amazon/favicon.png b/modules/amazon/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..884408f883655ba7f97844b2de543c671b13f5f3 GIT binary patch literal 2231 zcmZ`*c{tPy7yj8zQW;yrkYvp^+>xDzvBi)rOUT$InK8!hazn_LxR&f>DaIbMhC#V> zWhY@MYZxKJFg|_%eSdt}NfjJ5QLc{};fu1kQd(VMlq|`2>V9u*CrY&-LG+YpN+(0sxkDLnOj7Xm;yM=%YM00T%MV zUTtGOrBOcidP+h5P9uLunB3n_yUS1egyJYRGY(~ffj7_{L|!t>u1IIvfv=)2=Q zMG_?omwNgc*^R&IF=oGyyfjPLKRzDVQR&F9-U+#+!M2~HzP`S`r@2F=YIafgG7OEK z!D(4Uk@-3ab zPAzjDr>@>x7|P=RmZix`h(fM|JTM_{k1O+4R!UG7KPPRD z*mz&p?=1@2kO6nXRj~*HlT2)O54z*;p3MHPnCvaf7S?bz34=`Xv08|O*a>3d?(4w3 zC>D%O^1HksBr-Olbm2aRUh^*p-2C44Tw$os@GgW@*gyL=vkv)L@`Td*(Q!cy&Y21A_Jt~yj>hg>{K^6I4dq}+-zy$^!}V7 z_z+%o1wyi!PS?x#m@l3!$9Ja1qWOH6+CoFEq7gUp%Zo$ z@nVG>*30Hxf#g|14zUH}0*@q@q?epzCi|z7Dyv=5R>DcNa)~Fi$n+NOog4%Dn|;DZ zKM$orM><2+e`c01k)WIVB-{kJ5X_$SEJ63;HMaZFID6R_uly>kqqcoT8KE&L-*c%B zW`88cfW!-5jPD_jT9r3<$Dn(Bj z1!~dAeLTRI$A%43WW6e9N0db6GkR8fhWeUj9jHw4sc+wnxru8Je$Ue`v>-p>ALg*n za^`d!_p(;`)JHv>W|}axd#eTK6=lIQF8S?)NLfT@r89jK2^6NhdJ=^Zr^Vr&SCikf zfDy}x8`WU(QPcboWS1f@67I!ey8$c36l7FRF->g~a$ z5JJs8$Fa&NWcs@&bE+Iqm6pS>@TrtL3f*0C*0yirHBWSCBS#YVWFL&y-fmpj=?;$1 z8kJHC7j_=9TDjN&rk};=v>GZtk9S_@I=MA)(uOt3Q1n?_^Lt5_w+69OBsA0eMVVE{ zZ-DEsi;??fblKNG6qRQ}cmp5|Qe;K@ntJt4o%BIbk6^+y!Ox_Lzj?9p0B@%hUeg9^ z_=wl`u7`W@lGtWP4>Q?tLBSN~Q$8}GVS^eI`!uBKt6k&4WU=p`0pn3w(i)V~VmX*# z6=&x+Ddyyn{5#&4A{GPeZso-FlrIO3WjtMS8v*^^7fu&RP@jF(c2w5Rb2O z%SztVqhRCqsX$mVceY&u`BQdnuXU{%^i4I#JK`_QWk=#b-A`*?#i~cywit>glfV~a zqeIFoGCxZx`)SRZ8BLmiwHb}(jwk-)!_0g8d)z9fe`iZF`N;4((`cA^x1UvnnK@v6zt%QM}S_d<}flYY7iPr~XZJ{0abCsld zNRQoH;YQkydAF(>y8}zvH9(aOiDd{obrIi?br#)WhUzL4;>17Lu;XeJT;W=8!R{fE z61TiFHT#ab>)7?D@ZOJ=p~+7}+rvx?nr-tSmZ^0JLH-t3Qt>d&5uK4RT76kAr=T~9 zQdeZ=6;VZ$!-&>rB(yz(_&-z;##Yso_{==l_Et&_kmZg+fjXEik0B(v$%K^wsraho z#?6xW`vz^6TxALt(7J2Sqcs_KILa(#2Osf}$nHhiixUCv1N@8$NQQ$u?Smum7HK-s zcFJinL%pFX!lndHvS~GC+y8s*N%Ub3**Ja~jb~8WzfJ>JFrbg#xBx-8-CR(`T0Y z4r5#Eyv&02->QsS0`Ud7jHIeXTp;>O2Djok`>vBNDLBP+8H26~<`;D-GOgHBH9OD{ z$=%&71woRjdd*h+_1LdYv8)p4Wp<0989dP~X(@v4QXf6Kjmox_l}ek34W5R?cr~&u z?B4a`zdXeOKi{{BVK7xoL~~ z>)QCcqW#@eUHsh62~dE^!)4@QG77gW<>9Id=cytCQ&fe)Xpmvd{|J11UA^3c|Gyvw zi61%_Kz?7a@OAeOMEkh`2v=WcHz7kGw1=Cy8`?D(`_b+9Hvt&xnjovSou2#$D`gvN literal 0 HcmV?d00001 diff --git a/modules/amazon/module.py b/modules/amazon/module.py new file mode 100644 index 00000000..62717937 --- /dev/null +++ b/modules/amazon/module.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Oleg Plakhotniuk +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.shop import CapShop +from weboob.tools.backend import Module, BackendConfig +from weboob.tools.value import ValueBackendPassword + +from .browser import Amazon + + +__all__ = ['AmazonModule'] + + +class AmazonModule(Module, CapShop): + NAME = 'amazon' + MAINTAINER = u'Oleg Plakhotniuk' + EMAIL = 'olegus8@gmail.com' + VERSION = '1.1' + LICENSE = 'AGPLv3+' + DESCRIPTION = u'Amazon' + CONFIG = BackendConfig( + ValueBackendPassword('email', label='Username', masked=False), + ValueBackendPassword('password', label='Password')) + BROWSER = Amazon + + def create_default_browser(self): + return self.create_browser(self.config['email'].get(), + self.config['password'].get()) + + def get_currency(self): + return self.browser.get_currency() + + def get_order(self, id_): + return self.browser.get_order(id_) + + def iter_orders(self): + return self.browser.iter_orders() + + def iter_payments(self, order): + return self.browser.iter_payments(order) + + def iter_items(self, order): + return self.browser.iter_items(order) diff --git a/modules/amazon/pages.py b/modules/amazon/pages.py new file mode 100644 index 00000000..166d0a85 --- /dev/null +++ b/modules/amazon/pages.py @@ -0,0 +1,336 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Oleg Plakhotniuk +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.capabilities.bank.transactions import \ + AmericanTransaction as AmTr +from weboob.capabilities.shop import Order, Payment, Item +from weboob.browser.pages import HTMLPage, pagination, NextPage + +from datetime import datetime +from decimal import Decimal +import re + + +class AmazonPage(HTMLPage): + @property + def logged(self): + return bool(self.doc.xpath(u'//*[contains(text(),"Sign Out")]')) + + +class HomePage(AmazonPage): + def to_login(self): + url1 = self.doc.xpath('//a[@id="nav-link-yourAccount"]/@href') + url2 = self.doc.xpath('//a[@id="nav-your-account"]/@href') + self.browser.location((url1 or url2)[0]) + return self.browser.page + + +class LoginPage(AmazonPage): + def login(self, email, password): + form = self.get_form(name='signIn') + form['email'] = email + form['password'] = password + form.submit() + + +class HistoryPage(AmazonPage): + def iter_years(self): + for year in self.opt_years(): + yield self.to_year(year) + + @pagination + def iter_orders(self): + for id_ in self.doc.xpath( + u'//span[contains(text(),"Order #")]/../span[2]/text()'): + yield self.browser.to_order(id_.strip()) + for next_ in self.doc.xpath(u'//ul[@class="a-pagination"]' + u'//a[contains(text(),"Next")]/@href'): + raise NextPage(next_) + + def to_year(self, year): + form = self.get_form('//form[contains(@class,"time-period-chooser")]') + form['orderFilter'] = [year] + form.submit() + return self.browser.page + + def opt_years(self): + return [x for x in self.doc.xpath( + '//select[@name="orderFilter"]/option/@value' + ) if x.startswith('year-')] + + +class OrderNewPage(AmazonPage): + is_here = u'//*[contains(text(),"Ordered on")]' + + def order(self): + # Reports only fully shipped and delivered orders, because they have + # finalized payment amounts. + # Payment for not yet shipped orders may change, and is not always + # available. + for s in [u'Not Yet Shipped', u'Preparing for Shipment', + u'Shipping now']: + if self.doc.xpath(u'//*[contains(text(),"%s")]' % s): + return None + + order = Order(id=self.order_number()) + order.date = self.order_date() + order.tax = self.tax() + order.discount = self.discount() + order.shipping = self.shipping() + return order + + def order_date(self): + return datetime.strptime( + re.match('.*Ordered on ([^ ]+ [0-9]+, [0-9]+) .*', + self.date_num()).group(1), + '%B %d, %Y') + + def order_number(self): + m = re.match('.*Order# ([^ ]+) .*', self.date_num()) + if m: + return m.group(1) + + def payments(self): + if self.gift(): + pmt = Payment() + pmt.date = self.order_date() + pmt.method = u'GIFT CARD' + pmt.amount = -self.gift() + yield pmt + for trans in self.transactions(): + yield trans + + def date_num(self): + return u' '.join(self.doc.xpath( + '//div[contains(text(),"Ordered on")]/text()')).replace('\n', '') + + def tax(self): + return self.amount(u'Estimated tax to be collected') + + def shipping(self): + return self.amount(u'Free shipping', u'Free Shipping', + u'Shipping & Handling') + + def discount(self): + return self.amount(u'Promotion applied', u'Promotion Applied', + u'Subscribe & Save', u'Your Coupon Savings') + + def gift(self): + return self.amount(u'Gift Card Amount') + + def amount(self, *names): + return Decimal(sum(AmTr.decimal_amount(amount.strip()) + for n in names for amount in self.doc.xpath( + '//span[contains(text(),"%s:")]/../..//span[2]/text()' % n))) + + def transactions(self): + for row in self.doc.xpath('//span[contains(text(),"Transactions")]' + '/../../div/div'): + text = row.text_content().strip().replace('\n', ' ') + if u'Items shipped:' not in text: + continue + date, method, amount = re.match( + '.* ' '([A-z]+ [0-9]+, [0-9]+)' + '[ -]+' '([A-z][^:]+)' + ': +' '([^ ]+)', text).groups() + date = datetime.strptime(date, '%B %d, %Y') + method = method.replace(u'ending in ', u'').upper() + amount = AmTr.decimal_amount(amount) + pmt = Payment() + pmt.date = date + pmt.method = method + pmt.amount = amount + yield pmt + + def items(self): + for item in self.doc.xpath('//div[contains(@class,"a-box shipment")]' + '/div/div/div/div/div/div'): + url = (item.xpath(u'*//a[contains(@href,"/gp/product")]/@href') + + [u''])[0] + label = u''.join(item.xpath( + '*//a[contains(@href,"/gp/product")]/text()')).strip() + price = u''.join(x.strip() for x in item.xpath( + '*//span[contains(text(),"$")]/text()') + if x.strip().startswith('$')) + price = AmTr.decimal_amount(price) + multi = re.match(u'([0-9]+) of (.*)', label) + if multi: + amount, label = multi.groups() + price *= Decimal(amount) + if url: + url = unicode(self.browser.BASEURL) + \ + re.match(u'(/gp/product/.*)/ref=.*', url).group(1) + if label and price: + itm = Item() + itm.label = label + itm.url = url + itm.price = price + yield itm + + +class OrderOldPage(AmazonPage): + is_here = u'//*[contains(text(),"Amazon.com order number")]' + + def order(self): + # Reports only fully shipped and delivered orders, because they have + # finalized payment amounts. + # Payment for not yet shipped orders may change, and are not always + # available. + for s in [u'Not Yet Shipped', u'Preparing for Shipment', + u'Shipping now']: + if self.doc.xpath(u'//b[contains(text(),"%s")]' % s): + return None + + order = Order(id=self.order_number()) + order.date = self.order_date() + order.tax = self.tax() + order.discount = self.discount() + order.shipping = self.shipping() + return order + + def order_date(self): + return datetime.strptime(u' '.join(self.doc.xpath( + u'//b[contains(text(),"Order Placed")]/../text()')).strip(), + '%B %d, %Y') + + def order_number(self): + return u' '.join(self.doc.xpath( + u'//td/b[contains(text(),"Amazon.com order number")]/../text()') + ).strip() + + def tax(self): + return self.sum_amounts(u'Sales Tax:') + + def discount(self): + return self.sum_amounts(u'Subscribe & Save:', u'Promotion applied:', + u'Promotion Applied:', u'Your Coupon Savings:') + + def shipping(self): + return self.sum_amounts(u'Shipping & Handling:', u'Free shipping:', + u'Free Shipping:') + + def payments(self): + for shmt in self.shipments(): + gift = self.gift(shmt) + if gift: + pmt = Payment() + pmt.date = self.order_date() + pmt.method = u'GIFT CARD' + pmt.amount = -gift + yield pmt + transactions = list(self.transactions()) + if transactions: + for t in transactions: + yield t + else: + for method in self.paymethods(): + pmt = Payment() + pmt.date = self.order_date() + pmt.method = method + pmt.amount = self.grand_total() + yield pmt + break + + def shipments(self): + for cue in (u'Shipment #', u'Subscribe and Save Shipment'): + for shmt in self.doc.xpath('//b[contains(text(),"%s")]' % cue): + yield shmt + + def items(self): + for shmt in self.shipments(): + root = shmt.xpath(u'../../../../../../../..' + u'//b[text()="Items Ordered"]')[0] + for item in root.xpath('../../../tr')[1:]: + count = url = label = None + for div in item.xpath('*//div'): + m = re.match(u'^\s*(\d+)\s*of:(.*)$', div.text, + re.MULTILINE + re.DOTALL) + if not m: + continue + count = Decimal(m.group(1).strip()) + label = unicode(m.group(2).strip()) + if label: + url = u'' + else: + a = div.xpath('*//a[contains(@href,"/gp/product")]')[0] + url = unicode(a.attrib['href']) + label = unicode(a.text.strip()) + price1 = item.xpath('*//div')[-1].text.strip() + price = count * AmTr.decimal_amount(price1) + + itm = Item() + itm.label = label + itm.url = url + itm.price = price + yield itm + + def sum_amounts(self, *names): + return sum(self.amount(shmt,x) for shmt in self.shipments() + for x in names) + + def amount(self, shmt, name): + for root in shmt.xpath(u'../../../../../../../..' + u'//td[text()="Item(s) Subtotal: "]/../..'): + for node in root.xpath(u'tr/td[text()="%s"]' % name): + return AmTr.decimal_amount( + node.xpath('../td')[-1].text.strip()) + for node in root.xpath(u'tr/td/b[text()="%s"]' % name): + return AmTr.decimal_amount( + node.xpath('../../td/b')[-1].text.strip()) + return Decimal(0) + + def gift(self, shmt): + return self.amount(shmt, u'Gift Card Amount:') + + def paymethods(self): + root = self.doc.xpath('//b[text()="Payment Method: "]/..')[0] + text = root.text_content().strip() + while text: + for pattern in [ + u'^.*Payment Method:', + u'^([^\n]+)\n +\| Last digits: +([0-9]+)\n', + u'^Gift Card\n', # Skip gift card. + u'^Billing address.*$']: + match = re.match(pattern, text, re.DOTALL+re.MULTILINE) + if match: + text = text[match.end():].strip() + if match.groups(): + yield u' '.join(match.groups()).upper() + break + else: + break + + def transactions(self): + for tr in self.doc.xpath( + u'//div[contains(b,"Credit Card transactions")]' + u'/following-sibling::table[1]/tr'): + label, date = tr.xpath('td[1]/text()')[0].strip().split(u'\xa0') + amount = tr.xpath('td[2]/text()')[0].strip() + date = datetime.strptime(date, '%B %d, %Y:') + method = label.replace(u'ending in ', u'')[:-1].upper() + amount = AmTr.decimal_amount(amount) + pmt = Payment() + pmt.date = date + pmt.method = method + pmt.amount = amount + yield pmt + + def grand_total(self): + return AmTr.decimal_amount(self.doc.xpath( + '//td[b="Grand Total:"]/following-sibling::td[1]/b')[0].text) diff --git a/modules/amazon/test.py b/modules/amazon/test.py new file mode 100644 index 00000000..f0b8f3d7 --- /dev/null +++ b/modules/amazon/test.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Oleg Plakhotniuk +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.test import BackendTest + + +class AmazonTest(BackendTest): + MODULE = 'amazon' + + def test_history(self): + """ + Test that at least one item was ordered in the whole history. + """ + b = self.backend + items = (i for o in b.iter_orders() for i in b.iter_items(o)) + item = next(items, None) + self.assertNotEqual(item, None)