[filter] improve Join filter
The filter will now act a little bit more like the join command I also added some parameters that will help formatting (newLine, addBedore, addAfter)
This commit is contained in:
parent
a3d351eb88
commit
11a63c33ce
5 changed files with 22 additions and 10 deletions
|
|
@ -61,7 +61,7 @@ class SearchPage(HTMLPage):
|
|||
klass = BaseJobAdvert
|
||||
|
||||
obj_url = Format('%s#%s', Env('url'), Env('id'))
|
||||
obj_description = Join('%s\r\n',
|
||||
obj_description = Join('\r\n',
|
||||
'div/fieldset/*[(@class="titreParagraphe" or @class="normal")]',
|
||||
textCleaner=CleanHTML)
|
||||
obj_title = CleanText('div/span[@class="intituleposte"]')
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ class AdvertPage(HTMLPage):
|
|||
obj_url = BrowserURL('advert', _id=Env('_id'))
|
||||
obj_title = CleanText('//div[@id="jobcopy"]/h1[@itemprop="title"]|//div[@itemprop="title"]/h1')
|
||||
obj_description = CleanHTML('//div[@id="jobBodyContent"]|//div[@itemprop="description"]')
|
||||
obj_contract_type = Join('%s ', '//dd[starts-with(@class, "multipledd")]')
|
||||
obj_contract_type = Join(' ', '//dd[starts-with(@class, "multipledd")]')
|
||||
obj_society_name = CleanText('//dd[@itemprop="hiringOrganization"]')
|
||||
obj_place = CleanText('//span[@itemprop="jobLocation"]')
|
||||
obj_pay = CleanText('//span[@itemprop="baseSalary"]')
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ class AdvertPage(HTMLPage):
|
|||
class get_job_advert(ItemElement):
|
||||
klass = BaseJobAdvert
|
||||
|
||||
obj_description = Join('\n%s', '//div[@id="annonce-detail"]/p[@class="text"]', textCleaner=CleanHTML)
|
||||
obj_description = Join('\n', '//div[@id="annonce-detail"]/p[@class="text"]', textCleaner=CleanHTML)
|
||||
obj_id = Env('_id')
|
||||
obj_url = BrowserURL('advert_page', _id=Env('_id'))
|
||||
obj_publication_date = Date(Regexp(CleanText('//div[@id="annonce-detail"]/p[@class="infos"]'),
|
||||
|
|
|
|||
|
|
@ -125,8 +125,8 @@ class Description(Filter):
|
|||
return Format(u'%s %s\n\n%s%s\n\n',
|
||||
CleanText("%s/div[@class='d-rubric-inner']/h1" % header),
|
||||
CleanText("%s/div[@class='d-rubric-inner']/small" % header),
|
||||
Join(u'- %s\n', "%s/ul[@class='pvi-product-specs']/li" % header),
|
||||
Join(u'- %s\n', "%s/ul/li" % section))(el[0])
|
||||
Join(u'- ', "%s/ul[@class='pvi-product-specs']/li" % header, newline=True),
|
||||
Join(u'- ', "%s/ul/li" % section, newline=True, addBefore=' - '))(el[0])
|
||||
|
||||
|
||||
class EventPage(HTMLPage):
|
||||
|
|
|
|||
|
|
@ -690,19 +690,31 @@ class BrowserURL(MultiFilter):
|
|||
|
||||
|
||||
class Join(Filter):
|
||||
def __init__(self, pattern, selector=None, textCleaner=CleanText):
|
||||
def __init__(self, pattern, selector=None, textCleaner=CleanText, newline=False, addBefore='', addAfter=''):
|
||||
super(Join, self).__init__(selector)
|
||||
self.pattern = pattern
|
||||
self.textCleaner = textCleaner
|
||||
self.newline = newline
|
||||
self.addBefore = addBefore
|
||||
self.addAfter = addAfter
|
||||
|
||||
@debug()
|
||||
def filter(self, el):
|
||||
res = u''
|
||||
for li in el:
|
||||
res += self.pattern % self.textCleaner.clean(li)
|
||||
items = [self.textCleaner.clean(e) for e in el]
|
||||
items = [item for item in items if item]
|
||||
|
||||
return res
|
||||
if self.newline:
|
||||
items = ['%s\r\n' % item for item in items]
|
||||
|
||||
result = self.pattern.join(items)
|
||||
|
||||
if self.addBefore:
|
||||
result = '%s%s' % (self.addBefore, result)
|
||||
|
||||
if self.addAfter:
|
||||
result = '%s%s' % (result, self.addAfter)
|
||||
|
||||
return result
|
||||
|
||||
class Eval(MultiFilter):
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue