#Coding:utf-8" "Created on October 9, 2017 @author:li.liu" " fromSeleniumImportWebdriver fromlxmlImportetreeImportUrllibImportUrllib2Import Time#url= ' http://www.woyihome.com 'Url='Http://sso.woyihome.com/sso/pc-login'#url= ' http://www.baidu.com 'User_agent='mozilla/5.0 (Windows NT 6.1; Win64; x64) applewebkit/537.36 (khtml, like Gecko) chrome/60.0.3112.90 safari/537.36'Values= {'name':' Why', ' Location':'SDU', 'language':'Python'} headers= {'user-agent': user_agent} data=Urllib.urlencode (values) Req=Urllib2. Request (URL, data, headers) Response=Urllib2.urlopen (req) HTML1= Response.read (). Encode ('Utf-8')deftest1 (): X1={} #html1=urllib.urlopen (URL). read (). Decode (' Utf-8 ') #Print HTML1Hxml=etree. HTML (HTML1)#Print HxmlHtree=etree. ElementTree (Hxml)#Print HtreeId_dite=htree.xpath ('//*[@id]') #Print Id_ditecoun=0 forId_itemsinchId_dite:#print Id_items.items () #print Htree.getpath (id_items) forId_iteminchId_items.items ():#Print Id_item ifid_item[0]=='ID': Id_str='//*[@id = "'+id_item[1]+'"]'X1[id_str]=[] #Print Id_strId_path=Htree.getpath (Htree.xpath (ID_STR) [0])#Print Id_pathid_str1=id_str+'//*'idelem_list=Htree.xpath (ID_STR1)#Print Idelem_list forEinchidelem_list:ifLen (E.items ()) = =0:Pass Else: E_path=Htree.getpath (e)#Print E_pathe_path1=e_path.split (Id_path)#Print E_path1[1] ifLen (e_path1) >1: E_str=id_str+e_path1[1] E_list=e_str.split ('/') if 'Li' inchE_list[len (E_list)-1]or 'ul' inchE_list[len (E_list)-1]or 'span' inchE_list[len (e_list)-1]: Pass Else: #Print E_strCoun+=1x1[id_str].append (E_STR)" "For i in X1: #print i for I1 in X1[i]: print I1" "a=0 B=0 Driver=Webdriver. Chrome () driver.get (URL)#Print Driver.title forIinchX1:#Print I forI1inchX1[i]:#Print I1 Try: D=Driver.find_element_by_xpath (i1) a+=1PrintD.text Time.sleep (2) Driver.find_element_by_xpath (I1). Click () HEADX=Driver.window_handles#Print HEADX Print 'current page address: \ n', Driver.current_url time.sleep (1) PrintI'\ n' ifLen (HEADX)!=1: Driver.switch_to_window (headx[1]) Durl=Driver.current_urlPrint 'current page address: \ n', Durl,'\ n' if 'Woyihome' inchdurl:driver.close () Driver.switch_to_window (headx[0]) Else: K=1 Break elif 'localhost' inchDriver.current_url:Printaexcept : Pass #Print B Printa#driver.quit () #print ' ==================================================== ' PrintCoun Test1 ()
Python iterates through HTML XPath via lxml