weboob-devel/weboob/backends/aum/pages/profile.py
2011-04-08 12:48:07 +02:00

445 lines
18 KiB
Python

# -*- coding: utf-8 -*-
# Copyright(C) 2008-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.backends.aum.pages.base import PageBase
from weboob.tools.ordereddict import OrderedDict
from copy import deepcopy
from logging import warning
import re
class FieldBase:
def __init__(self, key):
self.key = key
def put_value(self, d, value):
raise NotImplementedError
class FieldString(FieldBase):
def put_value(self, d, value):
d[self.key] = unicode(value)
class FieldList(FieldBase):
def put_value(self, d, value):
d[self.key] = value.split(', ')
class FieldWideList(FieldBase):
def put_value(self, d, value):
d[self.key] += [value]
class FieldOld(FieldBase):
regexp = re.compile(u'([0-9]+) ans( \(Née le ([0-9]+) ([^ ]+) ([0-9]+)\))?')
month2i = ['', 'janvier', u'février', 'mars', 'avril', 'mai', 'juin', 'juillet', u'août', 'septembre', 'octobre', 'novembre', u'décembre']
def put_value(self, d, value):
m = self.regexp.match(value)
if not m:
return
d[self.key] = int(m.group(1))
if not m.group(2):
return
try:
d['birthday'] = (int(m.group(3)),
self.month2i.index(m.group(4)),
int(m.group(5)))
except ValueError, e:
print str(e)
class FieldLocation(FieldBase):
location = re.compile('(?P<location>.+?)( \((?P<zipcode>[0-9]{5})\))?, (?P<country>.*)')
def __init__(self):
FieldBase.__init__(self, '')
def put_value(self, d, value):
# TODO: determine distance, or something like
m = self.location.match(value)
if m:
for field in ('country', 'location', 'zipcode'):
d[field] = m.groupdict().get(field)
else:
warning('Unable to parse the location "%s"' % value)
d['location'] = unicode(value)
class FieldMeasurements(FieldBase):
height = re.compile('([0-9]{1,3}) cm')
weight = re.compile('([0-9]{1,3}) kg')
# TODO: parse third parameter
def __init__(self):
FieldBase.__init__(self, '')
def put_value(self, d, value):
for s in value.split(', '):
m = self.height.match(s)
if m:
d['height'] = int(m.group(1))
continue
m = self.weight.match(s)
if m:
d['weight'] = int(m.group(1))
continue
if d['height'] and d['weight']:
bmi = (d['weight']/float(pow(d['height']/100.0, 2)))
if bmi < 15.5:
d['fat'] = 'severely underweight'
elif bmi < 18.4:
d['fat'] = 'underweight'
elif bmi < 24.9:
d['fat'] = 'normal'
elif bmi < 30:
d['fat'] = 'overweight'
else:
d['fat'] = 'obese'
d['BMI'] = bmi
class FieldParticularSignes(FieldBase):
def __init__(self): FieldBase.__init__(self, '')
def put_value(self, d, value):
for s in value.split(', '):
if s.find('tatouages') >= 0:
d['tatoos'] = True
elif s.find('piercing') >= 0:
d['piercing'] = True
elif s.find('lunettes') >= 0:
d['glasses'] = True
elif s.find('rousseur') >= 0:
d['freckle'] = True
class ProfilePage(PageBase):
empty_table = OrderedDict((
('details', OrderedDict((
('old', 0),
('birthday', (0,0,0)),
('zipcode', 0),
('location', ''),
('country', ''),
('eyes', ''),
('hairs', []),
('height', 0),
('weight', 0),
('BMI', 0),
('fat', ''),
('from', ''),
('tatoos', False),
('piercing', False),
('freckle', False),
('glasses', False),
('job', ''),
('style', ''),
('alimentation', ''),
('alcool', ''),
('tabac', ''),
))),
('liking', OrderedDict((
('activities', ''),
('music', []),
('cinema', []),
('books', []),
('tv', []),
))),
('sex', OrderedDict((
('underwear', []),
('top', []),
('bottom', []),
('interval', ''),
('favorite', []),
('practices', []),
('toys', []),
))),
('personality', OrderedDict((
('snap', ''),
('exciting', ''),
('hate', ''),
('vices', ''),
('assets', ''),
('fantasies', ''),
('is', []),
))),
))
tables = {'tab_0': 'details',
'tab_1': 'liking',
'tab_2': 'sex',
'tab_3': 'personality'
}
fields = {'details': {'Age': FieldOld('old'),
u'Réside à': FieldLocation(),
'Yeux': FieldString('eyes'),
'Cheveux ': FieldList('hairs'),
'Mensurations ': FieldMeasurements(),
'Origines ': FieldString('from'),
'Signes particuliers ': FieldParticularSignes(),
'Style ': FieldString('style'),
'Profession ': FieldString('job'),
'Alimentation': FieldString('alimentation'),
'Alcool': FieldString('alcool'),
'Tabac': FieldString('tabac'),
},
'liking': {'Hobbies ': FieldString('activities'),
'Musique ': FieldWideList('music'),
u'Cinéma': FieldWideList('cinema'),
'Livres ': FieldWideList('books'),
u'Télé': FieldWideList('tv'),
},
'sex': {u'Sous-v\xeatements ': FieldList('underwear'),
'... en haut ': FieldList('top'),
'... en bas ': FieldList('bottom'),
u'Fréquence idéale des rapports sexuels ':
FieldString('interval'),
u'Position favorite ': FieldList('favorite'),
'Pratiques sexuelles ': FieldList('practices'),
u'Accessoires préférés ':FieldList('toys'),
u'Ce qui se cache en dessous ':
FieldList('underwear'),
u"Ce qui m'\xe9moustille ":
FieldList('favorite'),
u"Au lit j'aime ": FieldList('practices'),
u'Mes accessoires ': FieldList('toys'),
},
'personality': {u'Ça la fait craquer ': FieldString('snap'),
u'Ça l\'excite ': FieldString('exciting'),
u'Elle déteste ': FieldString('hate'),
'Ses vices ': FieldString('vices'),
'Ses atouts ': FieldString('assets'),
'Ses fantasmes ': FieldString('fantasies'),
'Elle est ': FieldList('is'),
},
}
ID_REGEXP = re.compile('(charm|addBasket|openAlbum)\(([0-9]+)(,[\s\'\d]+)?\)')
PHOTO_REGEXP = re.compile('http://(s|p)([0-9]+)\.adopteunmec\.com/(.*)')
STATS2ID = {'visites': 'visits',
'charmes': 'charms',
'paniers': 'baskets',
'mails': 'mails',
'POPULARIT': 'score',
}
STATS_VALUE_REGEXP = re.compile('([0-9\s]+).*')
def __repr__(self):
if isinstance(self.name, unicode):
name = self.name.encode('utf-8', 'backslashreplace')
else:
name = self.name
return '<Profile name="%s">' % name
def on_loaded(self):
self.name = u''
self.description = u''
self.table = deepcopy(self.empty_table)
self.id = 0
self.photos = []
self.status = ''
self.stats = OrderedDict((
('score', 0),
('visits', 0),
('charms', 0),
('baskets', 0),
('mails', 0),
))
divs = self.document.getElementsByTagName('td')
for div in divs:
if (div.hasAttribute('style') and
div.getAttribute('style') == "color:#ffffff;font-size:32px;font-weight:bold;letter-spacing:-2px" and
hasattr(div.firstChild, 'data')):
self.name = div.firstChild.data
if (div.hasAttribute('style') and
div.getAttribute('style') == "font-size:12px;font-weight:bold" and
hasattr(div.firstChild, 'data')):
self.status = div.firstChild.data
if div.hasAttribute('background'):
m = self.PHOTO_REGEXP.match(div.getAttribute('background'))
if m:
self.photos.append(dict(url=re.sub(u'thumb[0-2]_', u'image', div.getAttribute('background')),
hidden=False))
if div.hasAttribute('width') and str(div.getAttribute('width')) == '226':
trs = div.getElementsByTagName('tr')
for tr in trs:
tds = tr.getElementsByTagName('td')
if len(tds) > 2 and hasattr(tds[2].firstChild, 'data'):
label = tds[0].firstChild.data
value = tds[2].firstChild.data
elif len(tds) == 2:
label = unicode(tds[0].childNodes[1].data)
value = tds[1].childNodes[1].data
else:
continue
m = self.STATS_VALUE_REGEXP.match(value)
if m and self.STATS2ID.has_key(label):
self.stats[self.STATS2ID[label]] = int(m.group(1).replace(' ', ''))
divs = self.document.getElementsByTagName('div')
for div in divs:
if div.hasAttribute('id'):
if div.getAttribute('id') == 'about_div':
self.parse_description(div)
if div.getAttribute('id').startswith('tab_'):
self.parse_table(div)
for tag in ('img', 'td'):
imgs = self.document.getElementsByTagName(tag)
for img in imgs:
if img.hasAttribute('onclick'):
m = self.ID_REGEXP.match(img.getAttribute('onclick'))
if m:
self.id = int(m.group(2))
break
if self.id:
break
if len(self.photos) == 0:
return
# find hidden photos.
photo_regex = re.compile('(?P<base_url>http://.+\.adopteunmec\.com/.+/)image(?P<id>.+)\.jpg')
photo_max_id = max(int(photo_regex.match(photo['url']).groupdict()['id']) for photo in self.photos)
base_url = photo_regex.match(self.photos[0]['url']).groupdict()['base_url']
for id in xrange(1, photo_max_id + 1):
url = '%simage%s.jpg' % (base_url, id)
if not url in [photo['url'] for photo in self.photos]:
self.photos.append(dict(url=url, hidden=True))
def parse_description(self, div):
# look for description
description = ''
for c in div.childNodes:
if hasattr(c, 'data'):
description += ''.join(c.data.split('\n')) # to strip \n
elif hasattr(c, 'tagName') and c.tagName == 'br':
description += '\n'
elif hasattr(c, 'tagName') and c.tagName == 'i':
description += ''.join(c.childNodes[0].data.split('\n'))
self.description = description.strip()
def parse_table(self, div):
d = self.table[self.tables[div.getAttribute('id')]]
fields = self.fields[self.tables[div.getAttribute('id')]]
table = div.getElementsByTagName('table')[1]
field1 = None
field2 = None
for tr in table.getElementsByTagName('tr'):
tds = tr.getElementsByTagName('td')
if len(tds) != 2:
continue
label1 = ''
label2 = ''
value1 = ''
value2 = ''
# Check for first column
if len(tds[0].childNodes) > 0:
b = len(tds[0].childNodes) > 2 and tds[0].childNodes[2]
if b and hasattr(b, 'tagName') and b.tagName == 'b':
for child in b.childNodes:
label1 += child.data
else:
for child in tds[0].childNodes:
if child.data != u'\xa0': # strip nbsp
value1 += child.data
value2 = value2.strip()
# Check for second column
if len(tds[1].childNodes) > 0:
b = tds[1].childNodes[0]
if b and hasattr(b, 'tagName') and b.tagName == 'b':
for child in b.firstChild.childNodes:
label2 += child.data
else:
for child in tds[1].childNodes:
if hasattr(child, 'data') and child.data != u'\xa0': # strip nbsp
value2 += child.data
if label1 and value2:
# This is a typically tuple of key/value on the line.
try:
fields[label1].put_value(d, value2)
except KeyError:
self.logger.warning('Unable to find "%s" (%s)' % (label1, repr(label1)))
elif label1 and label2:
# two titles, so there will have a list of value in
# next lines on each columns
field1 = fields[label1]
field2 = fields[label2]
elif not label1 and not label1:
# two values, so it is a line of values
if field1 and value1:
field1.put_value(d, value1)
if field2 and value2:
field2.put_value(d, value2)
def get_name(self):
return self.name
def get_description(self):
return self.description
def get_table(self):
return self.table
def get_id(self):
return self.id
def get_photos(self):
return self.photos
def get_status(self):
return self.status
def is_online(self):
return self.status.find('en ligne') >= 0
def get_stats(self):
return self.stats
def get_profile_text(self):
body = u'Status: %s' % unicode(self.status)
if self.photos:
body += u'\nPhotos:'
for photo in self.photos:
body += u'\n\t\t%s%s' % (unicode(photo['url']), (' (hidden)' if photo['hidden'] else ''))
body += u'\nStats:'
for label, value in self.get_stats().iteritems():
body += u'\n\t\t%-15s %s' % (label + ':', value)
body += u'\n\nInformations:'
for section, d in self.get_table().iteritems():
body += u'\n\t%s\n' % section
for key, value in d.items():
key = '%s:' % key
if isinstance(value, list):
body += u'\t\t%-15s %s\n' % (key, u', '.join(unicode(s) for s in value))
elif isinstance(value, float):
body += u'\t\t%-15s %.2f\n' % (key, value)
else:
body += u'\t\t%-15s %s\n' % (key, unicode(value))
body += u'\n\nDescription:\n%s' % unicode(self.get_description())
return body