# -*- coding: utf-8 -*-"""@author: amtsing"""''' Google翻譯'''import execjsclass Py4Js(): def __init__(self): self.ctx = execjs.compile(""" function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g < a.length; g++) { var m = a.charCodeAt(g); 128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512)
&& g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 +
((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), e[f++] = m >> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f < e.length; f++) a += e[f], a = RL(a, $b); a = RL(a, Zb); a ^= b1 || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c < b.length - 2; c += 3) { var d = b.charAt(c + 2), d = d >= t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """) def getTk(self, text): return self.ctx.call("TL", text)import urllib.request, urllib.parsedef open_url(url): '''開啟網頁連結''' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64;rv:23.0)Gecko/20100101
Firefox/23.0'} req = urllib.request.Request(url=url, headers=headers) response = urllib.request.urlopen(req) data = response.read().decode('utf-8') return datadef translate(content): '''定義翻譯函數''' if len(content) > 4891: print("翻譯的長度超過限制。。。") return # 擷取tk值 if len(content) > 4891: print("翻譯的長度超過限制。。。") return js = Py4Js() tk = js.getTk(content) # 對輸入內容編碼 content = urllib.parse.quote(content) url = "http://translate.google.cn/translate_a/single?client=t&sl=en&tl=zh-CN&hl=zh-CN&dt
=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&
source=bh&otf=1&ssel=0&tsel=0&kc=1&tk=%s&q=%s" % (tk, content)
# 傳回值是一個多層嵌套列表的字串形式,解析起來還相當費勁,寫了幾個正則,發現也很不理想, # 後來感覺,使用正則簡直就是把簡單的事情複雜化,這裡直接切片就Ok了 result = open_url(url) end = result.find("\",") if end > 4: print(' '+ result[4:end]) output = (' '+ result[4:end]) return str(output)''' 識別驗證碼'''import osimport requestsfrom PIL import Image,ImageGrabimport pytesseractfrom collections import Counter,OrderedDict# def downimg(url):# '''下載圖片'''# with open ('verifycodepage.jpg','wb') as f:# s = requests.Session()# response = s.get(url)# f.write(response.content)def acumulate_colors(image): '''對色彩像素進行統計''' img = Image.open(image) pixdata = img.load() # c = Counter() # print(pixdata) colors = {} for y in range(img.size[1]): for x in range(img.size[0]): #print(pixdata[x, y]) # c.update(pixdata[x, y]) if pixdata[x, y] in colors: colors[pixdata[x, y]] += 1 else: colors[pixdata[x, y]] = 1 colors = sorted(colors.items(),key=lambda d:d[1],reverse=True) # c = OrderedDict(c) # print (c.values()) print(colors[2][0]) return colors# def gray():# '''灰階化'''# img = Image.open('verifycodepage.jpg')# img.convert('L').save('灰色圖.jpg')''' convert() 是映像執行個體對象的一個方法,接受一個 mode 參數,用以指定一種色彩模式,mode 的取值可以是如下幾種:· 1 (1-bit pixels, black and white, stored with one pixel per byte)· L (8-bit pixels, black and white)· P (8-bit pixels, mapped to any other mode using a colour palette)· RGB (3x8-bit pixels, true colour)· RGBA (4x8-bit pixels, true colour with transparency mask)· CMYK (4x8-bit pixels, colour separation)· YCbCr (3x8-bit pixels, colour video format)· I (32-bit signed integer pixels)· F (32-bit floating point pixels)怎麼樣,夠豐富吧。其實如此之處,PIL 還有限制地支援以下幾種比較少見的色彩模式:LA (L with alpha), RGBX (true colour with padding) and RGBa。'''def binary(image): '''二值化''' img = Image.open(image) pixdata = img.load() for y in range(img.size[1]): for x in range(img.size[0]): # if pixdata[x, y] != colors[0][0]: # pixdata[x, y] = (255,255,255) # else: # pixdata[x, y] = (0,0,0) if pixdata[x, y][0] < 115: pixdata[x, y] = (0, 0, 0)# 黑色 for y in range(img.size[1]):