ImportThreading fromSeleniumImportWebdriver fromCollectionsImportdequesonglist=set ([]);p laylist=set ([]);#Song listdefchrome_browser_songlist (Url,browser): Browser.get (URL) play_count= browser.find_element_by_id ('Play-count'). Textif(int (play_count) >10000): Data='\ n'+browser.find_element_by_class_name ('f-ff2'). text+'Number of comments:'+str (Play_count) +'Address:'+URL save_file (data,'D:\\songlist.txt') Songqueue=deque ()Try: #A[href^=\/song] foreachinchBrowser.find_elements_by_css_selector ('A[href^=\/song]'): Try: Print("Song name:%s Address%s"% (Each.text, Each.get_property ('href')) ) Songqueue.append (Each.get_property ('href')) except: Continue except: Print('Someerror') Song_queue (songqueue,browser)#Find a song listPlaylistqueue =deque ()Try: foreachinchBrowser.find_elements_by_css_selector ('A[href^=\/playlist]'): Try: Print("Song list:%s Address%s"% (Each.text, Each.get_property ('href')) ) Playlistqueue.append (Each.get_property ('href')) except: Continue except: Print('Someerror') browser.close () browser= Webdriver. Chrome ('C:\Program Files\google\chrome\application\chromedriver.exe') Play_list_queue (playlistqueue,browser)#browser.close ()#Songsdefchrome_browser_song (URL): Browser= Webdriver. Chrome ('C:\Program Files\google\chrome\application\chromedriver.exe') browser.get (URL) browser.switch_to_frame ('G_iframe') Comment_count= browser.find_element_by_id ('Cnt_comment_count'). Textif(int (comment_count) >10000): Data='\ n Song name:'+browser.find_element_by_class_name ('f-ff2'). text+'Singer:'+browser.find_element_by_css_selector ('A[href^=\/artist]'). text+'Number of comments:'+comment_count+'Song address:'+URL; Save_file (data,'D:\\song.txt') Browser.close ()#Save FiledefSave_file (data,file): Save_path=file F_obj= Open (Save_path,'a') f_obj.write (data) f_obj.close ()#Song QueuedefSong_queue (songqueue,browser): whileSongqueue:current_url=Songqueue.popleft ()ifCurrent_url not inchSongList:songList.add (Current_url)Try: Chrome_browser_song (Current_url)except: Continue#Song single queuedefPlay_list_queue (listqueue,browser): whileListqueue:current_url=Listqueue.popleft ()ifCurrent_url not inchPlayList:playList.add (Current_url)Try: Chrome_browser_songlist (current_url,browser)except: Continueurl_list=[ 'http://music.163.com/playlist?id=598057191', 'http://music.163.com/#/playlist?id=144236857', ]defthread_1 (): URL=Url_list[0] Browser= Webdriver. Chrome ('C:\Program Files\google\chrome\application\chromedriver.exe') chrome_browser_songlist (URL, browser)defthread_2 (): URL= Url_list[1] Browser= Webdriver. Chrome ('C:\Program Files\google\chrome\application\chromedriver.exe') chrome_browser_songlist (URL, browser)#MultithreadingdefThread_song (): Threads=[] T1= Threading. Thread (target=thread_1) threads.append (t1) T2= Threading. Thread (target=thread_2) threads.append (T2)returnThreadsif __name__=='__main__': #url = url_list[1] #browser = webdriver. Chrome (' C:\Program files\google\chrome\application\chromedriver.exe ') #chrome_browser_songlist (URL, browser)Threads =Thread_song () forTinchThreads:t.setdaemon (True) T.start () T.join ()
Because there is no solution to the landing problem, the use of a more stupid method ~ ~
Python NetEase Cloud Music crawl