boobot: Remove punctuation at the end when 404
This commit is contained in:
parent
2b7761fe9b
commit
bd28eca0f6
1 changed files with 7 additions and 4 deletions
|
|
@ -35,7 +35,7 @@ from mechanize import _headersutil as headersutil
|
||||||
from mechanize._html import EncodingFinder
|
from mechanize._html import EncodingFinder
|
||||||
|
|
||||||
from weboob.core import Weboob
|
from weboob.core import Weboob
|
||||||
from weboob.tools.browser import StandardBrowser, BrowserUnavailable, BrowserHTTPError
|
from weboob.tools.browser import StandardBrowser, BrowserUnavailable
|
||||||
from weboob.tools.misc import get_backtrace
|
from weboob.tools.misc import get_backtrace
|
||||||
from weboob.tools.misc import to_unicode
|
from weboob.tools.misc import to_unicode
|
||||||
from weboob.tools.storage import StandardStorage
|
from weboob.tools.storage import StandardStorage
|
||||||
|
|
@ -90,16 +90,19 @@ class BoobotBrowser(StandardBrowser):
|
||||||
ENCODING = None
|
ENCODING = None
|
||||||
DEFAULT_TIMEOUT = 3
|
DEFAULT_TIMEOUT = 3
|
||||||
|
|
||||||
def urlinfo(self, url):
|
def urlinfo(self, url, maxback=2):
|
||||||
if urlparse.urlsplit(url).netloc == 'mobile.twitter.com':
|
if urlparse.urlsplit(url).netloc == 'mobile.twitter.com':
|
||||||
url = url.replace('mobile.twitter.com', 'twitter.com', 1)
|
url = url.replace('mobile.twitter.com', 'twitter.com', 1)
|
||||||
try:
|
try:
|
||||||
r = self.openurl(HeadRequest(url), _tries=2, _delay=0.2)
|
r = self.openurl(HeadRequest(url), _tries=2, _delay=0.2)
|
||||||
body = False
|
body = False
|
||||||
except BrowserHTTPError as e:
|
except BrowserUnavailable as e:
|
||||||
if 'HTTP Error 501' in unicode(e) or 'HTTP Error 405' in unicode(e):
|
if u'HTTP Error 501' in unicode(e) or u'HTTP Error 405' in unicode(e):
|
||||||
r = self.openurl(url, _tries=2, _delay=0.2)
|
r = self.openurl(url, _tries=2, _delay=0.2)
|
||||||
body = True
|
body = True
|
||||||
|
elif u'HTTP Error 404' in unicode(e) \
|
||||||
|
and maxback and not url[-1].isalnum():
|
||||||
|
return self.urlinfo(url[:-1], maxback-1)
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
headers = r.info()
|
headers = r.info()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue