Call Google translation API for translation
#!/usr/bin/python #coding: UTF-8 import sys reload (SYS) sys.setdefaultencoding (' UTF8 ') from xml.etree import Eleme Nttree as ET #引入解析xml文件的模块 import re import urllib,urllib2 #引入接入网络接口API的模块 from socket import error as SocketError IM Port errno import OS import string #调用google translator API to translate def translate (text, F, t): "' Simulate the behavior of the browser, to Google Translate's homepage sends the data and then crawls the translation result ' #text Enter the English sentence to translate text_1=text # ' langpair ': ' En ' | ' Zh-cn ' from English to Simplified Chinese values = {' HL ': ' zh-cn ', ' ie ': ' UTF-8 ', ' text ': text, ' Langpair ': '%s|%s '% (f, t)} #values ={' hl ': ' Zh-c N ', ' ie ': ' UTF-8 ', ' text ': text_1, ' langpair ': ' En ' | ' Zh-cn ' "} #values ={' hl ': ' en ', ' ie ': ' UTF-8 ', ' text ': text_1, ' langpair ': ' zh ' | ' En ' "} url= ' http://translate.google.cn ' #这个地址至关重要, there was no output written incorrectly. A lot of URLs on the Web = ' http://translate.google.cn/translate_t ', unworkable data = Urllib.urlencode (values) req = Urllib2. Request (url,data) #模拟一个浏览器 browser= ' mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1. NET CLR 2.0.50727) ' Req.add_header (' user-agent ', browser) #向谷歌翻译发送请求 response = Urllib2.urlopen (r eq) #读取返回页面 html=response.read () #print (HTML) #从返回页面中过滤出翻译后的文本 #使用正则表达式匹配 #翻译后的文本是 ' transl Ated_text= ' The content after the equals sign #.*? Non-greedy or Minimal fashion # (? <= ...)
Matches if the current position in the ' string is ' preceded #by a match for ... that ends at the current position
P=re.compile (r "<=translated_text=). *?;")
M=p.search (HTML) #print m text_2=m.group (0). Strip (';')
Return text_2