Pythonproxy (click to open the link) is an open-source HTTP Proxy server with a small amount of code, but its implementation is very delicate and excellent. below is my comments in the code. The ownership of the Code belongs to the original author. Thank you very much. I have learned a lot! Thanks!
# -*- coding: cp1252 -*-# <PythonProxy.py>##Copyright (c) <2009> <Fábio Domingues - fnds3000 in gmail.com>##Permission is hereby granted, free of charge, to any person#obtaining a copy of this software and associated documentation#files (the "Software"), to deal in the Software without#restriction, including without limitation the rights to use,#copy, modify, merge, publish, distribute, sublicense, and/or sell#copies of the Software, and to permit persons to whom the#Software is furnished to do so, subject to the following#conditions:##The above copyright notice and this permission notice shall be#included in all copies or substantial portions of the Software.##THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,#EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES#OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND#NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,#WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING#FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR#OTHER DEALINGS IN THE SOFTWARE."""\Copyright (c) <2009> <Fábio Domingues - fnds3000 in gmail.com> <MIT Licence> ************************************** *** Python Proxy - A Fast HTTP proxy *** **************************************Neste momento este proxy é um Elie Proxy.Suporta os métodos HTTP: - OPTIONS; - GET; - HEAD; - POST; - PUT; - DELETE; - TRACE; - CONENCT.Suporta: - Conexões dos cliente em IPv4 ou IPv6; - Conexões ao alvo em IPv4 e IPv6; - Conexões todo o tipo de transmissão de dados TCP (CONNECT tunneling), p.e. ligações SSL, como é o caso do HTTPS.A fazer: - Verificar se o input vindo do cliente está correcto; - Enviar os devidos HTTP erros se não, ou simplesmente quebrar a ligação; - Criar um gestor de erros; - Criar ficheiro log de erros; - Colocar excepções nos sítios onde é previsível a ocorrência de erros, p.e.sockets e ficheiros; - Rever tudo e melhorar a estrutura do programar e colocar nomes adequados nas variáveis e métodos; - Comentar o programa decentemente; - Doc Strings.Funcionalidades futuras: - Adiconar a funcionalidade de proxy anónimo e transparente; - Suportar FTP?.(!) Atenção o que se segue só tem efeito em conexões não CONNECT, para estas o proxy é sempre Elite.Qual a diferença entre um proxy Elite, Anónimo e Transparente? - Um proxy elite é totalmente anónimo, o servidor que o recebe não consegue ter conhecimento da existência do proxy e não recebe o endereço IP do cliente; - Quando é usado um proxy anónimo o servidor sabe que o cliente está a usar um proxy mas não sabe o endereço IP do cliente; É enviado o cabeçalho HTTP "Proxy-agent". - Um proxy transparente fornece ao servidor o IP do cliente e um informação que se está a usar um proxy. São enviados os cabeçalhos HTTP "Proxy-agent" e "HTTP_X_FORWARDED_FOR"."""import socket, thread, select__version__ = '0.1.0 Draft 1'BUFLEN = 8192VERSION = 'Python Proxy/'+__version__HTTPVER = 'HTTP/1.1'class ConnectionHandler: def __init__(self, connection, address, timeout): self.client = connection self.client_buffer = '' self.timeout = timeout #[GET, /index.html, Http/1.1] self.method, self.path, self.protocol = self.get_base_header() if self.method=='CONNECT': self.method_CONNECT() elif self.method in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE'): self.method_others() #close the sockets at last self.client.close() self.target.close() def get_base_header(self): while 1: self.client_buffer += self.client.recv(BUFLEN) #better to use \r\n end = self.client_buffer.find('\n') if end!=-1: break #not including \n print '%s'%self.client_buffer[:end]#debug #split() without any arguments will split by regular space chars #It's ok to use client_buffer[:end] #data -> [GET, www.sohu.com/,Http/1.1] data = (self.client_buffer[:end+1]).split() #th remaining content self.client_buffer = self.client_buffer[end+1:] return data def method_CONNECT(self): #seems often used in proxy self._connect_target(self.path) self.client.send(HTTPVER+' 200 Connection established\n'+ 'Proxy-agent: %s\n\n'%VERSION) self.client_buffer = '' self._read_write() def method_others(self): #'http://www.iciba.com/' #remove 'http://' 7 characters #important!!!this is a http proxy server, so the browser's request # includings host domain in url #means: no proxy, GET / http/1.1 #with proxy, GET www.ni.com/ http/1.1 self.path = self.path[7:] i = self.path.find('/') host = self.path[:i] path = self.path[i:] #http://suggestion.baidu.com/su?wd=w&action=opensearch&ie=utf-8 #host ---> suggestion.baidu.com #path ---> /su?wd=w&action=opensearch&ie=utf-8 self._connect_target(host) #self.target is the sock connected to the remote server #form the real not proxy-styled http request #\n better to be \r\n self.target.send('%s %s %s\n'%(self.method, path, self.protocol)+ self.client_buffer) #clear the buffer self.client_buffer = '' self._read_write() def _connect_target(self, host): #parse port && host i = host.find(':') if i!=-1: port = int(host[i+1:]) host = host[:i] else: port = 80 #[(2, 0, 0, '', ('61.135.181.167', 80)), (2, 0, 0, '', ('61.135.132.59', 80))] #same host may have multiply addresses, uses the first here #2 = socket.AF_INET (soc_family, _, _, _, address) = socket.getaddrinfo(host, port)[0] self.target = socket.socket(soc_family) # here should judge if connection is ok or not #otherwise an exception will be raised self.target.connect(address) def _read_write(self): #select's timeout is 3 seconds, so totally timeout / 3 counts time_out_max = self.timeout/3 #client for browser request #target for proxy connection with http server socs = [self.client, self.target] count = 0 while 1: count += 1 #both in read and exception fd_set #the return value are (read_fd_set, write_fd_set, exception_fd_set) (recv, _, error) = select.select(socs, [], socs, 3) if error: break if recv: #iterating for in_ in recv: data = in_.recv(BUFLEN) if in_ is self.client: out = self.target else: out = self.client if data: #better to use sendall out.send(data) #reset count to indicate no timeout happened count = 0 if count == time_out_max: breakdef start_server(host='localhost', port=8080, IPv6=False, timeout=60, handler=ConnectionHandler): if IPv6==True: soc_type=socket.AF_INET6 else: soc_type=socket.AF_INET soc = socket.socket(soc_type) #added by me soc.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) soc.bind((host, port)) print "Serving on %s:%d."%(host, port)#debug #0 is implementation defined minimum listen sockets #not very good here #when debugging, set 0 to 1, only for 1 connection, #convinient for debugging #soc.listen(0) soc.listen(0) while 1: #soc.accept() ----> (client_sock, client_addr) #client_addr = (ip, port) #soc.accept() + (timeout, ) ----> a big tuple #the most important thing here is handler: #handler(client_sock, client_addr, timeout) #no matter what handler is, python just evaluate the above expression #which is happening to be a class instance creation, it's tricky here #one thread, one handler for one connection #and another important thing here is: there's no temporary variable which # may cause multi-threading problem #eg. #client_info_tuple = soc.accept() #thread.start_new_thread(handler, client_info_tuple+(timeout,)) #it's buggy, cuz when second connection is accepted, client_info_tuple # is changed to be second connection specific client_info, if the first #thread is created slowly, arguments passed to the first thread will also #be changed correspondingly thread.start_new_thread(handler, soc.accept()+(timeout,))if __name__ == '__main__': start_server()