support authors informations
This commit is contained in:
parent
f4f294502a
commit
08a7b36408
3 changed files with 73 additions and 10 deletions
|
|
@ -50,6 +50,8 @@ class HDSBackend(BaseBackend, ICapMessages):
|
||||||
thread.nb_messages = 1
|
thread.nb_messages = 1
|
||||||
yield thread
|
yield thread
|
||||||
|
|
||||||
|
GENDERS = ['<unknown>', 'boy', 'girl', 'transexual']
|
||||||
|
|
||||||
def get_thread(self, id):
|
def get_thread(self, id):
|
||||||
if isinstance(id, Thread):
|
if isinstance(id, Thread):
|
||||||
thread = id
|
thread = id
|
||||||
|
|
@ -75,12 +77,13 @@ class HDSBackend(BaseBackend, ICapMessages):
|
||||||
thread.root = Message(thread=thread,
|
thread.root = Message(thread=thread,
|
||||||
id=0,
|
id=0,
|
||||||
title=story.title,
|
title=story.title,
|
||||||
sender=story.author or u'',
|
sender=story.author.name,
|
||||||
receivers=None,
|
receivers=None,
|
||||||
date=thread.date,
|
date=thread.date,
|
||||||
parent=None,
|
parent=None,
|
||||||
content=story.body,
|
content=story.body,
|
||||||
children=[],
|
children=[],
|
||||||
|
signature='Written by a %s (%s)' % (self.GENDERS[story.author.sex], story.author.email),
|
||||||
flags=flags)
|
flags=flags)
|
||||||
|
|
||||||
return thread
|
return thread
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@
|
||||||
|
|
||||||
from weboob.tools.browser import BaseBrowser
|
from weboob.tools.browser import BaseBrowser
|
||||||
|
|
||||||
from .pages import ValidationPage, HomePage, HistoryPage, StoryPage
|
from .pages import ValidationPage, HomePage, HistoryPage, StoryPage, AuthorPage
|
||||||
|
|
||||||
# Browser
|
# Browser
|
||||||
class HDSBrowser(BaseBrowser):
|
class HDSBrowser(BaseBrowser):
|
||||||
|
|
@ -30,6 +30,7 @@ class HDSBrowser(BaseBrowser):
|
||||||
'http://histoires-de-sexe.net/menu.php': HomePage,
|
'http://histoires-de-sexe.net/menu.php': HomePage,
|
||||||
'http://histoires-de-sexe.net/sexe/histoires-par-date.php.*': HistoryPage,
|
'http://histoires-de-sexe.net/sexe/histoires-par-date.php.*': HistoryPage,
|
||||||
'http://histoires-de-sexe.net/sexe.php\?histoire=(?P<id>.+)': StoryPage,
|
'http://histoires-de-sexe.net/sexe.php\?histoire=(?P<id>.+)': StoryPage,
|
||||||
|
'http://histoires-de-sexe.net/fiche.php\?auteur=(?P<name>.+)': AuthorPage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_stories(self):
|
def iter_stories(self):
|
||||||
|
|
@ -52,3 +53,9 @@ class HDSBrowser(BaseBrowser):
|
||||||
self.location('/sexe.php?histoire=%d' % id)
|
self.location('/sexe.php?histoire=%d' % id)
|
||||||
assert self.is_on_page(StoryPage)
|
assert self.is_on_page(StoryPage)
|
||||||
return self.page.get_story()
|
return self.page.get_story()
|
||||||
|
|
||||||
|
def get_author(self, name):
|
||||||
|
self.location(self.buildurl('/fiche.php', auteur=name))
|
||||||
|
|
||||||
|
assert self.is_on_page(AuthorPage)
|
||||||
|
return self.page.get_author()
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,18 @@ class ValidationPage(BasePage):
|
||||||
class HomePage(BasePage):
|
class HomePage(BasePage):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class Author(object):
|
||||||
|
(UNKNOWN,
|
||||||
|
MALE,
|
||||||
|
FEMALE,
|
||||||
|
TRANSEXUAL) = xrange(4)
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
self.name = name
|
||||||
|
self.sex = self.UNKNOWN
|
||||||
|
self.email = None
|
||||||
|
self.description = None
|
||||||
|
|
||||||
class Story(object):
|
class Story(object):
|
||||||
def __init__(self, id):
|
def __init__(self, id):
|
||||||
self.id = id
|
self.id = id
|
||||||
|
|
@ -54,16 +66,16 @@ class HistoryPage(BasePage):
|
||||||
story = Story(int(m.group(1)))
|
story = Story(int(m.group(1)))
|
||||||
story.title = link.text.strip()
|
story.title = link.text.strip()
|
||||||
else:
|
else:
|
||||||
story.author = link.text.strip()
|
story.author = Author(link.text.strip())
|
||||||
date_text = link.tail.strip().split('\n')[-1].strip()
|
date_text = link.tail.strip().split('\n')[-1].strip()
|
||||||
m = re.match('(\d+)-(\d+)-(\d+)', date_text)
|
m = re.match('(\d+)-(\d+)-(\d+)', date_text)
|
||||||
if not m:
|
if not m:
|
||||||
self.logger.warning('Unable to parse datetime "%s"' % date_text)
|
self.logger.warning('Unable to parse datetime "%s"' % date_text)
|
||||||
story = None
|
story = None
|
||||||
continue
|
continue
|
||||||
story.date = datetime.datetime(int(m.group(3)),
|
story.date = datetime.date(int(m.group(3)),
|
||||||
int(m.group(2)),
|
int(m.group(2)),
|
||||||
int(m.group(1)))
|
int(m.group(1)))
|
||||||
yield story
|
yield story
|
||||||
story = None
|
story = None
|
||||||
|
|
||||||
|
|
@ -71,14 +83,33 @@ class StoryPage(BasePage):
|
||||||
def get_story(self):
|
def get_story(self):
|
||||||
story = Story((self.group_dict['id']))
|
story = Story((self.group_dict['id']))
|
||||||
story.body = u''
|
story.body = u''
|
||||||
story.author = self.parser.select(self.document.getroot(), 'a.t3', 1).text.strip()
|
meta = self.parser.select(self.document.getroot(), 'td.t0', 1)
|
||||||
|
story.author = Author(meta.xpath('./a[@class="t3"]')[0].text.strip())
|
||||||
|
gender = meta.xpath('./a[@class="t0"]')[0].text
|
||||||
|
if 'homme' in gender:
|
||||||
|
story.author.sex = story.author.MALE
|
||||||
|
elif 'femme' in gender:
|
||||||
|
story.author.sex = story.author.FEMALE
|
||||||
|
else:
|
||||||
|
story.author.sex = story.author.TRANSEXUAL
|
||||||
|
email_tag = meta.xpath('./span[@class="police1"]')[0]
|
||||||
|
story.author.email = email_tag.text.strip()
|
||||||
|
for img in email_tag.findall('img'):
|
||||||
|
if img.attrib['src'].endswith('meyle1.gif'):
|
||||||
|
story.author.email += '@'
|
||||||
|
elif img.attrib['src'].endswith('meyle1pouan.gif'):
|
||||||
|
story.author.email += '.'
|
||||||
|
else:
|
||||||
|
self.logger.warning('Unable to know what image is %s' % img.attrib['src'])
|
||||||
|
story.author.email += img.tail.strip()
|
||||||
|
|
||||||
story.title = self.parser.select(self.document.getroot(), 'h1', 1).text.strip()
|
story.title = self.parser.select(self.document.getroot(), 'h1', 1).text.strip()
|
||||||
date_text = self.parser.select(self.document.getroot(), 'span.t4', 1).text.strip().split('\n')[-1].strip()
|
date_text = self.parser.select(self.document.getroot(), 'span.t4', 1).text.strip().split('\n')[-1].strip()
|
||||||
m = re.match('(\d+)-(\d+)-(\d+)', date_text)
|
m = re.match('(\d+)-(\d+)-(\d+)', date_text)
|
||||||
if m:
|
if m:
|
||||||
story.date = datetime.datetime(int(m.group(3)),
|
story.date = datetime.date(int(m.group(3)),
|
||||||
int(m.group(2)),
|
int(m.group(2)),
|
||||||
int(m.group(1)))
|
int(m.group(1)))
|
||||||
else:
|
else:
|
||||||
self.logger.warning('Unable to parse datetime "%s"' % date_text)
|
self.logger.warning('Unable to parse datetime "%s"' % date_text)
|
||||||
|
|
||||||
|
|
@ -91,3 +122,25 @@ class StoryPage(BasePage):
|
||||||
story.body += para.tail.strip()
|
story.body += para.tail.strip()
|
||||||
story.body = story.body.replace(u'\x92', "'").strip()
|
story.body = story.body.replace(u'\x92', "'").strip()
|
||||||
return story
|
return story
|
||||||
|
|
||||||
|
|
||||||
|
class AuthorPage(BasePage):
|
||||||
|
def get_author(self):
|
||||||
|
meta = self.parser.select(self.document.getroot(), 'td.t0', 1)
|
||||||
|
author = Author(meta.xpath('./span[@class="t3"]')[0].text.strip())
|
||||||
|
if 'homme' in meta.xpath('./a[@class="t0"]')[0].text:
|
||||||
|
author.sex = author.MALE
|
||||||
|
else:
|
||||||
|
author.sex = author.FEMALE
|
||||||
|
|
||||||
|
author.description = u''
|
||||||
|
for para in meta.getchildren():
|
||||||
|
if para.tag not in ('b', 'br'):
|
||||||
|
continue
|
||||||
|
if para.text is not None:
|
||||||
|
author.description += '\n\n%s' % para.text.strip()
|
||||||
|
if para.tail is not None:
|
||||||
|
author.description += '\n%s' % para.tail.strip()
|
||||||
|
author.description = author.description.replace(u'\x92', "'").strip()
|
||||||
|
return author
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue