[filter] improve Join filter

The filter will now act a little bit more like the join command
I also added some parameters that will help formatting (newLine, addBedore, addAfter)
This commit is contained in:
Bezleputh 2015-03-06 18:02:23 +01:00
commit 11a63c33ce
5 changed files with 22 additions and 10 deletions

View file

@ -61,7 +61,7 @@ class SearchPage(HTMLPage):
klass = BaseJobAdvert
obj_url = Format('%s#%s', Env('url'), Env('id'))
obj_description = Join('%s\r\n',
obj_description = Join('\r\n',
'div/fieldset/*[(@class="titreParagraphe" or @class="normal")]',
textCleaner=CleanHTML)
obj_title = CleanText('div/span[@class="intituleposte"]')

View file

@ -73,7 +73,7 @@ class AdvertPage(HTMLPage):
obj_url = BrowserURL('advert', _id=Env('_id'))
obj_title = CleanText('//div[@id="jobcopy"]/h1[@itemprop="title"]|//div[@itemprop="title"]/h1')
obj_description = CleanHTML('//div[@id="jobBodyContent"]|//div[@itemprop="description"]')
obj_contract_type = Join('%s ', '//dd[starts-with(@class, "multipledd")]')
obj_contract_type = Join(' ', '//dd[starts-with(@class, "multipledd")]')
obj_society_name = CleanText('//dd[@itemprop="hiringOrganization"]')
obj_place = CleanText('//span[@itemprop="jobLocation"]')
obj_pay = CleanText('//span[@itemprop="baseSalary"]')

View file

@ -64,7 +64,7 @@ class AdvertPage(HTMLPage):
class get_job_advert(ItemElement):
klass = BaseJobAdvert
obj_description = Join('\n%s', '//div[@id="annonce-detail"]/p[@class="text"]', textCleaner=CleanHTML)
obj_description = Join('\n', '//div[@id="annonce-detail"]/p[@class="text"]', textCleaner=CleanHTML)
obj_id = Env('_id')
obj_url = BrowserURL('advert_page', _id=Env('_id'))
obj_publication_date = Date(Regexp(CleanText('//div[@id="annonce-detail"]/p[@class="infos"]'),

View file

@ -125,8 +125,8 @@ class Description(Filter):
return Format(u'%s %s\n\n%s%s\n\n',
CleanText("%s/div[@class='d-rubric-inner']/h1" % header),
CleanText("%s/div[@class='d-rubric-inner']/small" % header),
Join(u'- %s\n', "%s/ul[@class='pvi-product-specs']/li" % header),
Join(u'- %s\n', "%s/ul/li" % section))(el[0])
Join(u'- ', "%s/ul[@class='pvi-product-specs']/li" % header, newline=True),
Join(u'- ', "%s/ul/li" % section, newline=True, addBefore=' - '))(el[0])
class EventPage(HTMLPage):

View file

@ -690,19 +690,31 @@ class BrowserURL(MultiFilter):
class Join(Filter):
def __init__(self, pattern, selector=None, textCleaner=CleanText):
def __init__(self, pattern, selector=None, textCleaner=CleanText, newline=False, addBefore='', addAfter=''):
super(Join, self).__init__(selector)
self.pattern = pattern
self.textCleaner = textCleaner
self.newline = newline
self.addBefore = addBefore
self.addAfter = addAfter
@debug()
def filter(self, el):
res = u''
for li in el:
res += self.pattern % self.textCleaner.clean(li)
items = [self.textCleaner.clean(e) for e in el]
items = [item for item in items if item]
return res
if self.newline:
items = ['%s\r\n' % item for item in items]
result = self.pattern.join(items)
if self.addBefore:
result = '%s%s' % (self.addBefore, result)
if self.addAfter:
result = '%s%s' % (result, self.addAfter)
return result
class Eval(MultiFilter):
"""