Python version: 3.4.3
Code base required: Pil,selenium,tesseract
First on the code:
#coding: Utf-8
Import subprocess
From PIL import Image
From PIL import Imageops
From selenium import Webdriver
Import Time,os,sys
def cleanimage (ImagePath):
Image = Image.open (ImagePath) #打开图片
Image = Image.point (lambda x:0 if x<143 else 255) #处理图片上的每个像素点 so that each point on the picture is "not black or white"
Borderimage = Imageops.expand (image,border=20,fill= ' white ')
Borderimage.save (ImagePath)
def getauthcode (Driver, url= "http://localhost/"):
Captchaurl = URL + "Common/random"
Driver.get (Captchaurl)
Time.sleep (0.5)
Driver.save_screenshot ("Captcha.jpg") #截屏 and save the picture
#urlretrieve (Captchaurl, "captcha.jpg")
Time.sleep (0.5)
Cleanimage ("Captcha.jpg")
p = subprocess. Popen (["Tesseract", "captcha.jpg", "Captcha"], stdout=\
Subprocess. Pipe,stderr=subprocess. PIPE)
P.wait ()
f = open ("Captcha.txt", "R")
#Clean any whitespace characters
Captcharesponse = F.read (). Replace ("," "). Replace (" \ n "," ")
Print ("Captcha solution attempt:" + captcharesponse)
If Len (captcharesponse) = = 4:
Return Captcharesponse
Else
Return False
def withoutcookielogin (url= "http://org.cfu666.com/"):
Driver = Webdriver. Chrome ()
Driver.maximize_window ()
Driver.get (URL)
While True:
Authcode = Getauthcode (driver, URL)
If Authcode:
Driver.back ()
Driver.find_element_by_xpath ("//input[@id = ' Orgcode ' and @name = ' Orgcode ']"). Clear ()
Driver.find_element_by_xpath ("//input[@id = ' Orgcode ' and @name = ' Orgcode ']"). Send_keys ("Orgcode")
Driver.find_element_by_xpath ("//input[@id = ' account ' and @name = ' username ']"). Clear ()
Driver.find_element_by_xpath ("//input[@id = ' account ' and @name = ' username ']"). Send_keys ("username")
Driver.find_element_by_xpath ("//input[@type = ' password ' and @name = ' password ']"). Clear ()
Driver.find_element_by_xpath ("//input[@type = ' password ' and @name = ' password ']"). Send_keys ("password")
Driver.find_element_by_xpath ("//input[@type = ' text ' and @name = ' Authcode ']"). Send_keys (Authcode)
Driver.find_element_by_xpath ("//button[@type = ' submit ']"). Click ()
Try
Time.sleep (3)
Driver.find_element_by_xpath ("//*[@id = ' side-menu ']/li[2]/ul/li/a"). Click ()
Return driver
Except
Print ("Authcode Error:", Authcode)
Driver.refresh ()
Return driver
Driver = Withoutcookielogin ("http://localhost/")
Driver.get ("http://localhost/enterprise/add/")
How to get the verification code we need
In this get verification code on the road, I dropped too many pits, read too many articles, many are to teach you the verification Code identification method, but did not explain how to get your current need to verify the code picture.
My way of handling this is:
1. First use selenium to open the page address of the login you need URL1
2. Get the address of the verification code through the audit element url2 (in fact, the simplest is to right-click on the new page)
3: On the URL1 page, enter the address url2 enter the Url2 page, and then the screenshot to save the Verification code page
4: Process The verification code to get the CAPTCHA string. Then click the Browser Back button to return to the URL1 login page
5: Enter the required information and verification code for login
6: Click Sign In
7: Verify the Login page, determine whether the success, if not successful, you need to re-1-7 operation.
In order to protect the company's information, this page is my local service, I am on the Bole Online registration page to test the verification code to obtain the method, can pass. (This Verification code processing method, only the verification code background is the pixel point, if the verification code has a horizontal line to be extra processing.) )
The first blog post, do not like to spray.
Verification code processing Method Reference:
Web Scraping with Python.pdf
Python+selenium Identification Code and login