See the scrape chapter recently. There is a s_urls[0][' href ' that cannot be understood. Think Python has a non-numeric subscript array. After the multi-query to know that this is the tag query in BeautifulSoup
Https://stackoverflow.com/questions/5815747/beautifulsoup-getting-href?noredirect=1&lq=1
From BS4 import beautifulsoup# what does Thread meansfrom threading import threadimport urllib.request#location of Restaur Antshome_url= "https://www.yelp.com" find_what= "restaurants" location= "London" #Get all restaurants that match the Search Criteria#https://www.yelp.com/search?find_desc=restaurants&find_loc=londonsearch_url= "https:// Www.yelp.com/search?find_desc= "+find_what+" &find_loc= "+locations_html= Urllib.request.urlopen (Search_url). Read () #urlopen (Search_url). Read () print ("Here") Soups_s=beautifulsoup (s_html, "lxml") #Get URLs of top restaurants in Londons_urls=soups_s.select ('. Biz-name ' [:]) print (len (s_urls)) print (S_urls) url=[]print (Type (s_urls)) print ( Type (S_urls[0])) print (S_urls[0]) print (s_urls[0][' href ')) for U in Range (len (s_urls)): Url.append (Home_url+s_urls[u] [' href ']) #https://www.yelp.com/biz/duck-and-waffle-london-3?osq=restaurantsprint (URL) #Function that would do actual scraping jobdef Scrape (UR): html=urllib.request.urlopen (UR). read () Soup=beautifulsoup (HTML, "lxml") title=soup.select ('. Biz-page-title ') saddress=soup.select ('. street-address ') phone=soup.select ('. Biz-p ') Hone ') if Title:print ("title:", Title[0].gettext (). Strip ()) if Saddress:print ("Streeet Address:", Sadd Ress[0].gettext (). Strip ()) if Phone:print ("Phone number:", Phone[0].gettext (). Strip ()) print ("--------------- ------") threadlist=[] i=0 #Making thereads to perform scraping while (I<len (URL)): T=thread (target=s Crape,args= (Url[i],)) T.start () threadlist.append (t) i=i+1 for T in Threadlist:t.join ()
Python BS4 get href URL