[filter] improve Join filter

The filter will now act a little bit more like the join command
I also added some parameters that will help formatting (newLine, addBedore, addAfter)
This commit is contained in:
Bezleputh 2015-03-06 18:02:23 +01:00
commit 11a63c33ce
5 changed files with 22 additions and 10 deletions

View file

@ -61,7 +61,7 @@ class SearchPage(HTMLPage):
klass = BaseJobAdvert
obj_url = Format('%s#%s', Env('url'), Env('id'))
obj_description = Join('%s\r\n',
obj_description = Join('\r\n',
'div/fieldset/*[(@class="titreParagraphe" or @class="normal")]',
textCleaner=CleanHTML)
obj_title = CleanText('div/span[@class="intituleposte"]')

View file

@ -73,7 +73,7 @@ class AdvertPage(HTMLPage):
obj_url = BrowserURL('advert', _id=Env('_id'))
obj_title = CleanText('//div[@id="jobcopy"]/h1[@itemprop="title"]|//div[@itemprop="title"]/h1')
obj_description = CleanHTML('//div[@id="jobBodyContent"]|//div[@itemprop="description"]')
obj_contract_type = Join('%s ', '//dd[starts-with(@class, "multipledd")]')
obj_contract_type = Join(' ', '//dd[starts-with(@class, "multipledd")]')
obj_society_name = CleanText('//dd[@itemprop="hiringOrganization"]')
obj_place = CleanText('//span[@itemprop="jobLocation"]')
obj_pay = CleanText('//span[@itemprop="baseSalary"]')

View file

@ -64,7 +64,7 @@ class AdvertPage(HTMLPage):
class get_job_advert(ItemElement):
klass = BaseJobAdvert
obj_description = Join('\n%s', '//div[@id="annonce-detail"]/p[@class="text"]', textCleaner=CleanHTML)
obj_description = Join('\n', '//div[@id="annonce-detail"]/p[@class="text"]', textCleaner=CleanHTML)
obj_id = Env('_id')
obj_url = BrowserURL('advert_page', _id=Env('_id'))
obj_publication_date = Date(Regexp(CleanText('//div[@id="annonce-detail"]/p[@class="infos"]'),

View file

@ -125,8 +125,8 @@ class Description(Filter):
return Format(u'%s %s\n\n%s%s\n\n',
CleanText("%s/div[@class='d-rubric-inner']/h1" % header),
CleanText("%s/div[@class='d-rubric-inner']/small" % header),
Join(u'- %s\n', "%s/ul[@class='pvi-product-specs']/li" % header),
Join(u'- %s\n', "%s/ul/li" % section))(el[0])
Join(u'- ', "%s/ul[@class='pvi-product-specs']/li" % header, newline=True),
Join(u'- ', "%s/ul/li" % section, newline=True, addBefore=' - '))(el[0])
class EventPage(HTMLPage):