#-*-Coding:utf-8-*- Import Urllib2 Import Cookielib Import Urllib Import Image Import Cstringio From Pytesser Import * Import re Import OS #避免 unicodeencodeerror: ' ASCII ' codec can ' t encode character. The error Import Sys Reload (SYS) Sys.setdefaultencoding ("Utf-8") #下面这段是关键了, cookies will be bound for Urlib2.urlopen #MozillaCookieJar (also can be lwpcookiejar, here to simulate Firefox, so use this) to provide a read-write operation of the cookie file, store the cookie object Cookiejar = Cookielib. Mozillacookiejar () # The processor binds a save cookie object, and an HTTP cookie Cookiesupport= Urllib2. Httpcookieprocessor (Cookiejar) #下面两行为了调试的 HttpHandler = Urllib2. HttpHandler (debuglevel=1) Httpshandler = Urllib2. Httpshandler (debuglevel=1) #创建一个opener, the HTTP processor for the cookie is saved, and a handler is set to handle HTTP Opener = Urllib2.build_opener (CookieSupport, Httpshandler) #将包含了cookie, HTTP processors, HTTP handler resources and URLLIB2 objects are bound together, opener are installed, and Urlopen () is used after the opener object is invoked, Urllib2.install_opener (opener) #登陆页面 LoginPage = "Http://zhuzhou2013.feixuelixm.teacher.com.cn/IndexPage/Index.aspx" #要post的url loginurl = "Http://zhuzhou2013.feixuelixm.teacher.com.cn/GuoPeiAdmin/Login/Login.aspx" # #打开登陆页面, to get cookies. But because # #打开验证码页面就可以获取全部cookies了, you can skip this step directly. Kind of dispensable. #taobao = Urllib2.urlopen (loginpage) # #打印cookies #print Cookiejar # The cookie that opens the authentication code page after #先打开页面获取的cookie与 is different. # #提取验证码text (Enter the verification code manually) #vrifycodeUrl = "Http://zhuzhou2013.feixuelixm.teacher.com.cn/GuoPeiAdmin/Login/ImageLog.aspx" #file = Urllib2.urlopen (Vrifycodeurl) #pic = File.read () #path = "C:code.jpg" # #img = Cstringio.stringio (file) # Constructs a stringio holding the image Attributeerror:addinfourl instance has no ATT Ribute ' Seek ' #localpic = open (path, WB) #localpic. Write (pic) #localpic. Close () #print "Please%s,open code.jpg"%path # #text =raw_input ("Input code:") #im = Image.open (path) #text =image_to_string (IM) #print text #提取验证码地址 (identify with Pytesser, find a tutorial on your own online installation) #并且用pytesser Identify the authentication code, assign it to text, and print it out. Vrifycodeurl = "Http://zhuzhou2013.feixuelixm.teacher.com.cn/GuoPeiAdmin/Login/ImageLog.aspx" File = Urllib2.urlopen (Vrifycodeurl). Read () img = Cstringio.stringio (file) # Constructs a stringio holding the image Attributeerror:addinfourl instance has no Attri Bute ' seek ' im = Image.open (img) Text = Image_to_string (IM) Print "Vrifycode:", text #设置cookie的值, because the POST request head needs to return a cookie (not cookies, which is the value of processing the cookie format) cookies = ' #这里要从 For index, cookies in Enumerate (Cookiejar): #print ' [', Index, '] '; #print Cookie.name; #print Cookie.value; #print "###########################" cookies = cookies+cookie.name+ "=" +cookie.value+ ";"; Print "###########################" Cookie = cookies[:-1] Print "Cookies:", cookies #用户名, password #当然, I've reached the top of the summit to dispose of passwords and user names #username = "7879954564555664" #password = "12313164" #用户名, password Username = "430223198809308045" Password = "56961888" #请求数据包 PostData = { ' __eventtarget ': ', ' __eventargument ': ', ' __viewstate ': '/ wepdwukltcymzeymty2nw8wah4ltg9naw5lzfbhz2ufeexvz2luzwrqywdllmfzchgwamypzbyczg8pzbyghgv0axrszqug55so5oi35zcnl+ wtpus5ooeggs/ Ouqvku73or4hlj7ceb29uzm9jdxmfegnozwnrsw5wdxqodghpcykebm9uymx1cguncmvzdg9yzsh0aglzkwqyaquex19db250cm9sc1jlcxvpcmvqb3n0qmfj A0tlev9ffgefc0ltz2j0bkxvz2luckjjpnhruswhtput33uj1dbukvw= ', ' txtUserName ': username, ' Txtpassword ':p assword, ' Txtcode ': text, ' Imgbtnlogin.x ': 44, ' IMGBTNLOGIN.Y ': 14, ' Clientscreenwidth ': 1180 } #post请求头部 headers = { ' Accept ': ' Text/html,application/xhtml+xml, application/xml;q=0.9,*/*;q=0.8 ', ' accept-language ': ' zh-cn,en-us;q=0.8,zh;q=0.5,en;q=0.3 ' , ' accept-encoding ': ' gzip, deflate ', ' Host ': ' zhuzhou2013.feixuelixm.teacher.com.cn ', ' cookies ': Cookies, ' user-agent ': ' mozilla/5.0 (Windows NT 5.1; rv:29.0) gecko/20100101 firefox/29.0 ', & nbsp ' Referer ': ' http://zhuzhou2013.feixuelixm.teacher.com.cn/GuoPeiAdmin/Login/Login.aspx ', # ' Content-type ': ' application/x-www-form-urlencoded ', # ' content-length ': 474, ' Connection ': ' keep-alive ' } #合成post数据 data = Urllib.urlencode (postdata) Print "data:###############" Print data #创建request #构造request请求 Request = Urllib2. Request (Loginurl,data,headers) Try #访问页面 Response = Urllib2.urlopen (Request) #cur_url = Response.geturl () #print "Cur_url:", Cur_url Status = Response.getcode () Print status Except Urllib2. Httperror, E: Print E.code #将响应的网页打印到文件中, make it easy for you to troubleshoot errors #必须对网页进行解码处理 F= Response.read (). Decode ("UTF8") OutFile =open ("Rel_ip.txt", "W") Print >> outfile, "%s"% (f) #但因响应的信息 info = Response.info () Print Info #测试登陆是否成功, because in Testurl only after landing can access Testurl = "Http://zhuzhou2013.feixuelixm.teacher.com.cn/GuoPeiAdmin/Login/LoginedPage.aspx" Try Response = Urllib2.urlopen (Testurl) Except Urllib2. Httperror, E: Print E.code #因为后面要从网页查找字符来验证登陆成功与否, so make sure that the characters you look for are the same as the page code, otherwise you get the right conclusions. It is recommended to find in English, such as the ID in CSS, name and so on. F= Response.read (). Decode ("UTF8"). Encode ("UTF8") OutFile =open ("Out_ip.txt", "W") Print >> outfile, "%s"% (f) #在返回的网页中, look for "Hello" two characters, because only after the successful landing only two words, found that means landing success. Suggested in English tag = ' Hello '. Encode ("UTF8") If Re.search (tag,f): #登陆成功 print ' Logged in successfully! ' Else #登陆失败 print ' Logged in failed, check result.html file for details ' Response.close () |