445 lines
18 KiB
Python
445 lines
18 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright(C) 2008-2011 Romain Bignon
|
|
#
|
|
# This file is part of weboob.
|
|
#
|
|
# weboob is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# weboob is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
from weboob.backends.aum.pages.base import PageBase
|
|
from weboob.tools.ordereddict import OrderedDict
|
|
|
|
from copy import deepcopy
|
|
from logging import warning
|
|
import re
|
|
|
|
class FieldBase:
|
|
|
|
def __init__(self, key):
|
|
self.key = key
|
|
|
|
def put_value(self, d, value):
|
|
raise NotImplementedError
|
|
|
|
class FieldString(FieldBase):
|
|
def put_value(self, d, value):
|
|
d[self.key] = unicode(value)
|
|
|
|
class FieldList(FieldBase):
|
|
def put_value(self, d, value):
|
|
d[self.key] = value.split(', ')
|
|
|
|
class FieldWideList(FieldBase):
|
|
|
|
def put_value(self, d, value):
|
|
d[self.key] += [value]
|
|
|
|
class FieldOld(FieldBase):
|
|
regexp = re.compile(u'([0-9]+) ans( \(Née le ([0-9]+) ([^ ]+) ([0-9]+)\))?')
|
|
month2i = ['', 'janvier', u'février', 'mars', 'avril', 'mai', 'juin', 'juillet', u'août', 'septembre', 'octobre', 'novembre', u'décembre']
|
|
|
|
def put_value(self, d, value):
|
|
m = self.regexp.match(value)
|
|
if not m:
|
|
return
|
|
|
|
d[self.key] = int(m.group(1))
|
|
if not m.group(2):
|
|
return
|
|
|
|
try:
|
|
d['birthday'] = (int(m.group(3)),
|
|
self.month2i.index(m.group(4)),
|
|
int(m.group(5)))
|
|
except ValueError, e:
|
|
print str(e)
|
|
|
|
class FieldLocation(FieldBase):
|
|
location = re.compile('(?P<location>.+?)( \((?P<zipcode>[0-9]{5})\))?, (?P<country>.*)')
|
|
|
|
def __init__(self):
|
|
FieldBase.__init__(self, '')
|
|
def put_value(self, d, value):
|
|
# TODO: determine distance, or something like
|
|
m = self.location.match(value)
|
|
if m:
|
|
for field in ('country', 'location', 'zipcode'):
|
|
d[field] = m.groupdict().get(field)
|
|
else:
|
|
warning('Unable to parse the location "%s"' % value)
|
|
d['location'] = unicode(value)
|
|
|
|
class FieldMeasurements(FieldBase):
|
|
height = re.compile('([0-9]{1,3}) cm')
|
|
weight = re.compile('([0-9]{1,3}) kg')
|
|
# TODO: parse third parameter
|
|
|
|
def __init__(self):
|
|
FieldBase.__init__(self, '')
|
|
def put_value(self, d, value):
|
|
for s in value.split(', '):
|
|
m = self.height.match(s)
|
|
if m:
|
|
d['height'] = int(m.group(1))
|
|
continue
|
|
m = self.weight.match(s)
|
|
if m:
|
|
d['weight'] = int(m.group(1))
|
|
continue
|
|
if d['height'] and d['weight']:
|
|
bmi = (d['weight']/float(pow(d['height']/100.0, 2)))
|
|
if bmi < 15.5:
|
|
d['fat'] = 'severely underweight'
|
|
elif bmi < 18.4:
|
|
d['fat'] = 'underweight'
|
|
elif bmi < 24.9:
|
|
d['fat'] = 'normal'
|
|
elif bmi < 30:
|
|
d['fat'] = 'overweight'
|
|
else:
|
|
d['fat'] = 'obese'
|
|
d['BMI'] = bmi
|
|
|
|
class FieldParticularSignes(FieldBase):
|
|
def __init__(self): FieldBase.__init__(self, '')
|
|
def put_value(self, d, value):
|
|
for s in value.split(', '):
|
|
if s.find('tatouages') >= 0:
|
|
d['tatoos'] = True
|
|
elif s.find('piercing') >= 0:
|
|
d['piercing'] = True
|
|
elif s.find('lunettes') >= 0:
|
|
d['glasses'] = True
|
|
elif s.find('rousseur') >= 0:
|
|
d['freckle'] = True
|
|
|
|
class ProfilePage(PageBase):
|
|
empty_table = OrderedDict((
|
|
('details', OrderedDict((
|
|
('old', 0),
|
|
('birthday', (0,0,0)),
|
|
('zipcode', 0),
|
|
('location', ''),
|
|
('country', ''),
|
|
('eyes', ''),
|
|
('hairs', []),
|
|
('height', 0),
|
|
('weight', 0),
|
|
('BMI', 0),
|
|
('fat', ''),
|
|
('from', ''),
|
|
('tatoos', False),
|
|
('piercing', False),
|
|
('freckle', False),
|
|
('glasses', False),
|
|
('job', ''),
|
|
('style', ''),
|
|
('alimentation', ''),
|
|
('alcool', ''),
|
|
('tabac', ''),
|
|
))),
|
|
('liking', OrderedDict((
|
|
('activities', ''),
|
|
('music', []),
|
|
('cinema', []),
|
|
('books', []),
|
|
('tv', []),
|
|
))),
|
|
('sex', OrderedDict((
|
|
('underwear', []),
|
|
('top', []),
|
|
('bottom', []),
|
|
('interval', ''),
|
|
('favorite', []),
|
|
('practices', []),
|
|
('toys', []),
|
|
))),
|
|
('personality', OrderedDict((
|
|
('snap', ''),
|
|
('exciting', ''),
|
|
('hate', ''),
|
|
('vices', ''),
|
|
('assets', ''),
|
|
('fantasies', ''),
|
|
('is', []),
|
|
))),
|
|
))
|
|
|
|
tables = {'tab_0': 'details',
|
|
'tab_1': 'liking',
|
|
'tab_2': 'sex',
|
|
'tab_3': 'personality'
|
|
}
|
|
|
|
fields = {'details': {'Age': FieldOld('old'),
|
|
u'Réside à': FieldLocation(),
|
|
'Yeux': FieldString('eyes'),
|
|
'Cheveux ': FieldList('hairs'),
|
|
'Mensurations ': FieldMeasurements(),
|
|
'Origines ': FieldString('from'),
|
|
'Signes particuliers ': FieldParticularSignes(),
|
|
'Style ': FieldString('style'),
|
|
'Profession ': FieldString('job'),
|
|
'Alimentation': FieldString('alimentation'),
|
|
'Alcool': FieldString('alcool'),
|
|
'Tabac': FieldString('tabac'),
|
|
},
|
|
'liking': {'Hobbies ': FieldString('activities'),
|
|
'Musique ': FieldWideList('music'),
|
|
u'Cinéma': FieldWideList('cinema'),
|
|
'Livres ': FieldWideList('books'),
|
|
u'Télé': FieldWideList('tv'),
|
|
},
|
|
'sex': {u'Sous-v\xeatements ': FieldList('underwear'),
|
|
'... en haut ': FieldList('top'),
|
|
'... en bas ': FieldList('bottom'),
|
|
u'Fréquence idéale des rapports sexuels ':
|
|
FieldString('interval'),
|
|
u'Position favorite ': FieldList('favorite'),
|
|
'Pratiques sexuelles ': FieldList('practices'),
|
|
u'Accessoires préférés ':FieldList('toys'),
|
|
u'Ce qui se cache en dessous ':
|
|
FieldList('underwear'),
|
|
u"Ce qui m'\xe9moustille ":
|
|
FieldList('favorite'),
|
|
u"Au lit j'aime ": FieldList('practices'),
|
|
u'Mes accessoires ': FieldList('toys'),
|
|
},
|
|
'personality': {u'Ça la fait craquer ': FieldString('snap'),
|
|
u'Ça l\'excite ': FieldString('exciting'),
|
|
u'Elle déteste ': FieldString('hate'),
|
|
'Ses vices ': FieldString('vices'),
|
|
'Ses atouts ': FieldString('assets'),
|
|
'Ses fantasmes ': FieldString('fantasies'),
|
|
'Elle est ': FieldList('is'),
|
|
},
|
|
}
|
|
|
|
ID_REGEXP = re.compile('(charm|addBasket|openAlbum)\(([0-9]+)(,[\s\'\d]+)?\)')
|
|
PHOTO_REGEXP = re.compile('http://(s|p)([0-9]+)\.adopteunmec\.com/(.*)')
|
|
|
|
STATS2ID = {'visites': 'visits',
|
|
'charmes': 'charms',
|
|
'paniers': 'baskets',
|
|
'mails': 'mails',
|
|
'POPULARIT': 'score',
|
|
}
|
|
STATS_VALUE_REGEXP = re.compile('([0-9\s]+).*')
|
|
|
|
def __repr__(self):
|
|
if isinstance(self.name, unicode):
|
|
name = self.name.encode('utf-8', 'backslashreplace')
|
|
else:
|
|
name = self.name
|
|
return '<Profile name="%s">' % name
|
|
|
|
def on_loaded(self):
|
|
self.name = u''
|
|
self.description = u''
|
|
self.table = deepcopy(self.empty_table)
|
|
self.id = 0
|
|
self.photos = []
|
|
self.status = ''
|
|
self.stats = OrderedDict((
|
|
('score', 0),
|
|
('visits', 0),
|
|
('charms', 0),
|
|
('baskets', 0),
|
|
('mails', 0),
|
|
))
|
|
|
|
divs = self.document.getElementsByTagName('td')
|
|
for div in divs:
|
|
if (div.hasAttribute('style') and
|
|
div.getAttribute('style') == "color:#ffffff;font-size:32px;font-weight:bold;letter-spacing:-2px" and
|
|
hasattr(div.firstChild, 'data')):
|
|
self.name = div.firstChild.data
|
|
if (div.hasAttribute('style') and
|
|
div.getAttribute('style') == "font-size:12px;font-weight:bold" and
|
|
hasattr(div.firstChild, 'data')):
|
|
self.status = div.firstChild.data
|
|
if div.hasAttribute('background'):
|
|
m = self.PHOTO_REGEXP.match(div.getAttribute('background'))
|
|
if m:
|
|
self.photos.append(dict(url=re.sub(u'thumb[0-2]_', u'image', div.getAttribute('background')),
|
|
hidden=False))
|
|
if div.hasAttribute('width') and str(div.getAttribute('width')) == '226':
|
|
trs = div.getElementsByTagName('tr')
|
|
for tr in trs:
|
|
tds = tr.getElementsByTagName('td')
|
|
if len(tds) > 2 and hasattr(tds[2].firstChild, 'data'):
|
|
label = tds[0].firstChild.data
|
|
value = tds[2].firstChild.data
|
|
elif len(tds) == 2:
|
|
label = unicode(tds[0].childNodes[1].data)
|
|
value = tds[1].childNodes[1].data
|
|
else:
|
|
continue
|
|
|
|
m = self.STATS_VALUE_REGEXP.match(value)
|
|
if m and self.STATS2ID.has_key(label):
|
|
self.stats[self.STATS2ID[label]] = int(m.group(1).replace(' ', ''))
|
|
|
|
divs = self.document.getElementsByTagName('div')
|
|
for div in divs:
|
|
if div.hasAttribute('id'):
|
|
if div.getAttribute('id') == 'about_div':
|
|
self.parse_description(div)
|
|
|
|
if div.getAttribute('id').startswith('tab_'):
|
|
self.parse_table(div)
|
|
|
|
for tag in ('img', 'td'):
|
|
imgs = self.document.getElementsByTagName(tag)
|
|
for img in imgs:
|
|
if img.hasAttribute('onclick'):
|
|
m = self.ID_REGEXP.match(img.getAttribute('onclick'))
|
|
if m:
|
|
self.id = int(m.group(2))
|
|
break
|
|
if self.id:
|
|
break
|
|
|
|
if len(self.photos) == 0:
|
|
return
|
|
|
|
# find hidden photos.
|
|
photo_regex = re.compile('(?P<base_url>http://.+\.adopteunmec\.com/.+/)image(?P<id>.+)\.jpg')
|
|
photo_max_id = max(int(photo_regex.match(photo['url']).groupdict()['id']) for photo in self.photos)
|
|
base_url = photo_regex.match(self.photos[0]['url']).groupdict()['base_url']
|
|
for id in xrange(1, photo_max_id + 1):
|
|
url = '%simage%s.jpg' % (base_url, id)
|
|
if not url in [photo['url'] for photo in self.photos]:
|
|
self.photos.append(dict(url=url, hidden=True))
|
|
|
|
def parse_description(self, div):
|
|
# look for description
|
|
|
|
description = ''
|
|
for c in div.childNodes:
|
|
if hasattr(c, 'data'):
|
|
description += ''.join(c.data.split('\n')) # to strip \n
|
|
elif hasattr(c, 'tagName') and c.tagName == 'br':
|
|
description += '\n'
|
|
elif hasattr(c, 'tagName') and c.tagName == 'i':
|
|
description += ''.join(c.childNodes[0].data.split('\n'))
|
|
|
|
self.description = description.strip()
|
|
|
|
def parse_table(self, div):
|
|
d = self.table[self.tables[div.getAttribute('id')]]
|
|
fields = self.fields[self.tables[div.getAttribute('id')]]
|
|
table = div.getElementsByTagName('table')[1]
|
|
|
|
field1 = None
|
|
field2 = None
|
|
|
|
for tr in table.getElementsByTagName('tr'):
|
|
tds = tr.getElementsByTagName('td')
|
|
if len(tds) != 2:
|
|
continue
|
|
|
|
label1 = ''
|
|
label2 = ''
|
|
value1 = ''
|
|
value2 = ''
|
|
# Check for first column
|
|
if len(tds[0].childNodes) > 0:
|
|
b = len(tds[0].childNodes) > 2 and tds[0].childNodes[2]
|
|
if b and hasattr(b, 'tagName') and b.tagName == 'b':
|
|
for child in b.childNodes:
|
|
label1 += child.data
|
|
else:
|
|
for child in tds[0].childNodes:
|
|
if child.data != u'\xa0': # strip nbsp
|
|
value1 += child.data
|
|
value2 = value2.strip()
|
|
|
|
# Check for second column
|
|
if len(tds[1].childNodes) > 0:
|
|
b = tds[1].childNodes[0]
|
|
if b and hasattr(b, 'tagName') and b.tagName == 'b':
|
|
for child in b.firstChild.childNodes:
|
|
label2 += child.data
|
|
else:
|
|
for child in tds[1].childNodes:
|
|
if hasattr(child, 'data') and child.data != u'\xa0': # strip nbsp
|
|
value2 += child.data
|
|
|
|
if label1 and value2:
|
|
# This is a typically tuple of key/value on the line.
|
|
try:
|
|
fields[label1].put_value(d, value2)
|
|
except KeyError:
|
|
self.logger.warning('Unable to find "%s" (%s)' % (label1, repr(label1)))
|
|
elif label1 and label2:
|
|
# two titles, so there will have a list of value in
|
|
# next lines on each columns
|
|
field1 = fields[label1]
|
|
field2 = fields[label2]
|
|
elif not label1 and not label1:
|
|
# two values, so it is a line of values
|
|
if field1 and value1:
|
|
field1.put_value(d, value1)
|
|
if field2 and value2:
|
|
field2.put_value(d, value2)
|
|
|
|
def get_name(self):
|
|
return self.name
|
|
|
|
def get_description(self):
|
|
return self.description
|
|
|
|
def get_table(self):
|
|
return self.table
|
|
|
|
def get_id(self):
|
|
return self.id
|
|
|
|
def get_photos(self):
|
|
return self.photos
|
|
|
|
def get_status(self):
|
|
return self.status
|
|
|
|
def is_online(self):
|
|
return self.status.find('en ligne') >= 0
|
|
|
|
def get_stats(self):
|
|
return self.stats
|
|
|
|
def get_profile_text(self):
|
|
body = u'Status: %s' % unicode(self.status)
|
|
if self.photos:
|
|
body += u'\nPhotos:'
|
|
for photo in self.photos:
|
|
body += u'\n\t\t%s%s' % (unicode(photo['url']), (' (hidden)' if photo['hidden'] else ''))
|
|
body += u'\nStats:'
|
|
for label, value in self.get_stats().iteritems():
|
|
body += u'\n\t\t%-15s %s' % (label + ':', value)
|
|
body += u'\n\nInformations:'
|
|
for section, d in self.get_table().iteritems():
|
|
body += u'\n\t%s\n' % section
|
|
for key, value in d.items():
|
|
key = '%s:' % key
|
|
if isinstance(value, list):
|
|
body += u'\t\t%-15s %s\n' % (key, u', '.join(unicode(s) for s in value))
|
|
elif isinstance(value, float):
|
|
body += u'\t\t%-15s %.2f\n' % (key, value)
|
|
else:
|
|
body += u'\t\t%-15s %s\n' % (key, unicode(value))
|
|
body += u'\n\nDescription:\n%s' % unicode(self.get_description())
|
|
|
|
return body
|