fix parsing of profiles

This commit is contained in:
Romain Bignon 2015-01-23 16:35:01 +01:00
commit cd922779a3

View file

@ -135,15 +135,14 @@ class ProfilePage(Page):
profile['id'] = unicode(title.text[len('OkCupid: '):]) profile['id'] = unicode(title.text[len('OkCupid: '):])
profile['data'] = OrderedDict() profile['data'] = OrderedDict()
profile_p = self.parser.select(self.document.getroot(), "//div[@id='page_content']//p", method='xpath') profile_p = self.parser.select(self.document.getroot(), "//div[@id='page_content']//div[contains(@class, 'basics')]//p", method='xpath')
profile['data']['infos'] = ProfileNode('infos', u'Informations', OrderedDict(), flags=ProfileNode.SECTION) profile['data']['infos'] = ProfileNode('infos', u'Informations', OrderedDict(), flags=ProfileNode.SECTION)
info = { info = {
'age' : unicode(profile_p[1].text.split(' / ')[0]), 'age' : profile_p[1].text.split(u'', 1)[0].strip(),
'sex' : unicode(profile_p[1].text.split(' / ')[1]), 'location' : profile_p[1].text.split(u'', 1)[1].strip(),
'orientation' : unicode(profile_p[1].text.split(' / ')[2]), 'sex' : profile_p[2].text.strip(),
'relationship' : unicode(profile_p[1].text.split(' / ')[3]),
} }
for key, val in info.iteritems(): for key, val in info.iteritems():