From 0628802b723bb55c7a223394ee7551408245c59d Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Fri, 8 Mar 2013 13:36:39 +0100 Subject: [PATCH] [booblyrics] able to search by artist or songtitle --- modules/seeklyrics/backend.py | 4 +-- modules/seeklyrics/browser.py | 18 ++++++++----- modules/seeklyrics/pages.py | 28 ++++++++++++++++++-- modules/seeklyrics/test.py | 12 +++++++-- weboob/applications/booblyrics/booblyrics.py | 17 ++++++++---- weboob/capabilities/lyrics.py | 7 +++-- 6 files changed, 67 insertions(+), 19 deletions(-) diff --git a/modules/seeklyrics/backend.py b/modules/seeklyrics/backend.py index 58f200a1..054f186e 100644 --- a/modules/seeklyrics/backend.py +++ b/modules/seeklyrics/backend.py @@ -42,5 +42,5 @@ class SeeklyricsBackend(BaseBackend, ICapLyrics): def get_lyrics(self, id): return self.browser.get_lyrics(id) - def iter_lyrics(self, pattern): - return self.browser.iter_lyrics(quote_plus(pattern.encode('iso-8859-1'))) + def iter_lyrics(self, criteria, pattern): + return self.browser.iter_lyrics(criteria,quote_plus(pattern.encode('iso-8859-1'))) diff --git a/modules/seeklyrics/browser.py b/modules/seeklyrics/browser.py index 8fec62e6..224f8a43 100644 --- a/modules/seeklyrics/browser.py +++ b/modules/seeklyrics/browser.py @@ -20,7 +20,7 @@ from weboob.tools.browser import BaseBrowser -from .pages import ResultsPage, SonglyricsPage +from .pages import SongResultsPage, SonglyricsPage, ArtistResultsPage, ArtistSongsPage __all__ = ['SeeklyricsBrowser'] @@ -32,13 +32,19 @@ class SeeklyricsBrowser(BaseBrowser): ENCODING = 'iso-8859-1' USER_AGENT = BaseBrowser.USER_AGENTS['wget'] PAGES = { - 'http://www.seeklyrics.com/search.php.*': ResultsPage, - 'http://www.seeklyrics.com/lyrics/.*': SonglyricsPage, + 'http://www.seeklyrics.com/search.php.*t=1': SongResultsPage, + 'http://www.seeklyrics.com/search.php.*t=2': ArtistResultsPage, + 'http://www.seeklyrics.com/lyrics/.*html': SonglyricsPage, + 'http://www.seeklyrics.com/lyrics/.*/': ArtistSongsPage, } - def iter_lyrics(self, pattern): - self.location('http://www.seeklyrics.com/search.php?q=%s&t=1' % pattern.encode('utf-8')) - assert self.is_on_page(ResultsPage) + def iter_lyrics(self, criteria, pattern): + if criteria == 'artist': + type = 2 + else: + type = 1 + self.location('http://www.seeklyrics.com/search.php?q=%s&t=%s' % (pattern,type)) + assert self.is_on_page(ArtistResultsPage) or self.is_on_page(SongResultsPage) return self.page.iter_lyrics() def get_lyrics(self, id): diff --git a/modules/seeklyrics/pages.py b/modules/seeklyrics/pages.py index 371ec775..6856c68a 100644 --- a/modules/seeklyrics/pages.py +++ b/modules/seeklyrics/pages.py @@ -23,10 +23,34 @@ from weboob.capabilities.base import NotAvailable, NotLoaded from weboob.tools.browser import BasePage -__all__ = ['ResultsPage','SonglyricsPage'] +__all__ = ['SongResultsPage','SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage'] -class ResultsPage(BasePage): +class ArtistResultsPage(BasePage): + def iter_lyrics(self): + for link in self.parser.select(self.document.getroot(),'table[title~=Results] a.tlink'): + artist = unicode(link.text_content()) + self.browser.location('http://www.seeklyrics.com%s'%link.attrib.get('href','')) + assert self.browser.is_on_page(ArtistSongsPage) + for lyr in self.browser.page.iter_lyrics(artist): + yield lyr + + +class ArtistSongsPage(BasePage): + def iter_lyrics(self,artist): + for th in self.parser.select(self.document.getroot(),'th.text'): + txt = th.text_content() + if txt.startswith('Top') and txt.endswith('Lyrics'): + for link in self.parser.select(th.getparent().getparent(),'a.tlink'): + title = unicode(link.attrib.get('title','').replace(' Lyrics','')) + id = link.attrib.get('href','').replace('/lyrics/','').replace('.html','') + songlyrics = SongLyrics(id, title) + songlyrics.artist = artist + songlyrics.content = NotLoaded + yield songlyrics + + +class SongResultsPage(BasePage): def iter_lyrics(self): first = True for tr in self.parser.select(self.document.getroot(),'table[title~=Results] tr'): diff --git a/modules/seeklyrics/test.py b/modules/seeklyrics/test.py index d54f1e36..d8d656b6 100644 --- a/modules/seeklyrics/test.py +++ b/modules/seeklyrics/test.py @@ -23,8 +23,8 @@ from weboob.capabilities.base import NotLoaded class SeeklyricsTest(BackendTest): BACKEND = 'seeklyrics' - def test_search_n_get(self): - l_lyrics = list(self.backend.iter_lyrics('Complainte')) + def test_search_song_n_get(self): + l_lyrics = list(self.backend.iter_lyrics('song','Complainte')) for songlyrics in l_lyrics: assert songlyrics.id assert songlyrics.title @@ -36,3 +36,11 @@ class SeeklyricsTest(BackendTest): assert full_lyr.artist assert full_lyr.content is not NotLoaded + + def test_search_artist(self): + l_lyrics = list(self.backend.iter_lyrics('artist','boris vian')) + for songlyrics in l_lyrics: + assert songlyrics.id + assert songlyrics.title + assert songlyrics.artist + assert songlyrics.content is NotLoaded diff --git a/weboob/applications/booblyrics/booblyrics.py b/weboob/applications/booblyrics/booblyrics.py index e8f14df2..48f9eac0 100644 --- a/weboob/applications/booblyrics/booblyrics.py +++ b/weboob/applications/booblyrics/booblyrics.py @@ -69,6 +69,7 @@ class Booblyrics(ReplApplication): COMMANDS_FORMATTERS = {'search': 'lyrics_list', 'get': 'lyrics_get', } + SEARCH_CRITERIAS = ['artist','song'] def complete_get(self, text, line, *ignored): args = line.split(' ') @@ -99,17 +100,23 @@ class Booblyrics(ReplApplication): self.format(songlyrics) self.flush() - def do_search(self, pattern): - """ - search [PATTERN] + def complete_search(self, text, line, *ignored): + args = line.split(' ') + if len(args) == 2: + return self.SEARCH_CRITERIAS - Search lyrics. + def do_search(self, line): """ + search [artist | song] [PATTERN] + + Search lyrics by artist name or by song title. + """ + criteria, pattern = self.parse_command_args(line, 2, 1) self.change_path([u'search']) if not pattern: pattern = None self.start_format(pattern=pattern) - for backend, songlyrics in self.do('iter_lyrics', pattern=pattern): + for backend, songlyrics in self.do('iter_lyrics', criteria, pattern): self.cached_format(songlyrics) self.flush() diff --git a/weboob/capabilities/lyrics.py b/weboob/capabilities/lyrics.py index 21fe2dc8..8f8d80b0 100644 --- a/weboob/capabilities/lyrics.py +++ b/weboob/capabilities/lyrics.py @@ -41,10 +41,13 @@ class ICapLyrics(IBaseCap): """ Lyrics websites. """ - def iter_lyrics(self, pattern): + def iter_lyrics(self, criteria, pattern): """ - Search lyrics and iterate on results. + Search lyrics by artist or by song + and iterate on results. + :param criteria: 'artist' or 'song' + :type criteria: str :param pattern: pattern to search :type pattern: str :rtype: iter[:class:`SongLyrics`]