fix double with hidden photos finder

This commit is contained in:
Romain Bignon 2010-11-26 17:46:21 +01:00
commit fb01c73028

View file

@ -273,13 +273,6 @@ class ProfilePage(PageBase):
if m:
self.photos.append(dict(url=re.sub(u'thumb[0-2]_', u'image', div.getAttribute('background')),
hidden=False))
photo_regex = re.compile('(?P<base_url>http://.+\.adopteunmec\.com/.+/)image(?P<id>.+)\.jpg')
photo_max_id = max(int(photo_regex.match(photo['url']).groupdict()['id']) for photo in self.photos)
base_url = photo_regex.match(self.photos[0]['url']).groupdict()['base_url']
for id in xrange(1, photo_max_id + 1):
url = '%simage%s.jpg' % (base_url, id)
if not url in [photo['url'] for photo in self.photos]:
self.photos.append(dict(url=url, hidden=True))
if div.hasAttribute('width') and str(div.getAttribute('width')) == '226':
trs = div.getElementsByTagName('tr')
for tr in trs:
@ -317,6 +310,17 @@ class ProfilePage(PageBase):
if self.id:
break
if len(self.photos) == 0:
return
# find hidden photos.
photo_regex = re.compile('(?P<base_url>http://.+\.adopteunmec\.com/.+/)image(?P<id>.+)\.jpg')
photo_max_id = max(int(photo_regex.match(photo['url']).groupdict()['id']) for photo in self.photos)
base_url = photo_regex.match(self.photos[0]['url']).groupdict()['base_url']
for id in xrange(1, photo_max_id + 1):
if not url in [photo['url'] for photo in self.photos]:
self.photos.append(dict(url=url, hidden=True))
def parse_description(self, div):
# look for description