HTTP proxy server with Go language +goproxy library + picture Cache save script first version completed

Source: Internet
Author: User
This is a creation in Article, where the information may have evolved or changed. Use the Go Language +goproxy Library to write HTTP proxy server + Picture cache save first version completed

Usage:
1. Install Go 1.5.x
2, command line set environment variable GOSRC (under Windows Settings to "My Computer")
3. Install Goproxy library: Go get github.com/elazarl/goproxy
4. Run script: Go run go-httproxy-imagedumper.go
5. You can create EXE version with Go build

Package Mainimport ("CRYPTO/SHA1" "Errors" "Flag" "FMT" "io" "Log" "Net" "net/http"//"Net/http/httputil" "OS" "Os/signal" "Path" "RegExp" "Sync"//"Time" "github.com/elazarl/goproxy"//"Github.com/elazarl/goproxy/transport")//----------- ----------------------------------------------------------------Code Copied from Examples/goproxy-httpdumptype FileStream struct {path STRINGF *os. File}func newfilestream (Path string) *filestream {return &filestream{path, Nil}}func (fs *filestream) Write (b []byte ) (nr int, err error) {if fs.f = = Nil {fs.f, err = os. Create (Fs.path) if err! = Nil {return 0, Err}}return Fs.f.write (b)}func (fs *filestream) Close () error {FMT. Println ("Close", fs.path) if fs.f = = Nil {return errors. New ("FileStream is never written into")}return Fs.f.close ()}//---------------------------------------------------- -----------------------Code Copied from examples/goproxy-httpdump//teereadcloser extends Io. Teereader by allowing reader and writer to be//Closed.type teereadcloser Struct {r io. Readerw io. Writecloserc io. Closer}func newteereadcloser (R io. Readcloser, W io. Writecloser) io. Readcloser {return &teereadcloser{io. Teereader (R, W), W, R}}func (t *teereadcloser) Read (b []byte) (int, error) {return T.r.read (b)}//close attempts to close The reader and write. It returns an error if both//failed to Close.func (T *teereadcloser) Close () error {err1: = T.c.close () err2: = T.w.close ( ) if err1! = Nil {return Err1}return err2}//------------------------------------------------------------------------- --Code Copied from examples/goproxy-httpdump//Stoppablelistener serves stoppableconn and tracks their lifetime to notify When it was safe to terminate the Application.type stoppablelistener struct {net. listenersync.waitgroup}//the name that starts with a lowercase letter so that the type is not exported? Type stoppableconn struct {net. Connwg *sync. Waitgroup}func Newstoppablelistener (l net. Listener) *stoppablelistener {return &stoppablelistener{l, sync. Waitgroup{}}}func (SL *stoppablelistener) Accept () (NET. Conn, error){c, err: = sl. Listener.accept () if err! = Nil {return c, err}sl. ADD (1) return &stoppableconn{c, &AMP;SL. Waitgroup}, Nil}func (sc *stoppableconn) Close () error {Sc.wg.Done () return SC. Conn.close ()}var Imagesubtypefinder = RegExp. Mustcompile (' image/([a-z\-]+) ')//actual possible value: Image/png;charset=utf-8func Main () {verbose: = flag. Bool ("V", true, "should every proxy request is logged to stdout")//debug is temporarily set to trueaddr: = flag. String ("L", ": 8888", "on which address should the proxy listen") Cache_basedir: = flag. String ("D", "Cache-basedir", "cache directory to dump image response data") flag. Parse () Proxy: = Goproxy. Newproxyhttpserver () proxy. Verbose = *verboseif ERR: = OS. Mkdirall (*cache_basedir, 0755); Err! = Nil {log. Fatal ("Can ' t create cache base dir", err)}//prohibit some host requests that have been blocked by GFW to prevent the browser from having IO blocking: proxy.onrequest (goproxy. Reqhostis ("ajax.googleapis.com")). Dofunc (//This requires the host field specified in the header?) Func (R *http. Request, CTX *goproxy. PROXYCTX) (*http. Request, *http. Response) {return R, Goproxy. Newresponse (R,goproxy. ConteNttypetext, http. Statusrequesttimeout, "Access blocked"); /test URL: Http://shop.oreilly.com/category/mobile/new.do test successful proxy. Onresponse (). Dofunc (func (Resp *http. Response, CTX *goproxy. PROXYCTX) *http. Response {//Filter Condition://1, Status Code ==200//2, Response content-type== "image/*"//Note: Goproxy currently does not seem to handle chunked/gzipped response body, So the data is saved in the original format (this has no effect on the browser client) if CTX. Req.method! = "GET" {return resp;} If Resp==nil {return resp;//goproxy unique architectural design, regardless of}if resp. StatusCode = $ {return resp;//let it pass;} Contenttypeheadervalue: = resp. Header.get ("Content-type") if Contenttypeheadervalue = = "" {//todo: Even if no content-type is specified in the response header, It is still possible to judge whether the image data return resp by trying the Decode method;} M: = Imagesubtypefinder.findstringsubmatch (contenttypeheadervalue) if M==nil {return resp;//not image type, not processing}imagesubtype: = M[1]imagefilesuffix: = Imagesubtypeswitch imagefilesuffix{case "X-icon": Imagefilesuffix = "ico" case "JPEG": Imagefilesuffix = "JPG"}//can now store image data to a local file, the storage format is divided into 2 files://(1) < image original URL SHA1 hash > ". Headers"//Where the first line is get <ur L> and then the empty line, behindWith a response to the head; a blank line, followed by the request header data//(2) < image original URL of the SHA1 hash > "." imagefilesuffix//these 2 files appear in pairs, and may be mapped to subdirectories under the cache base directory (general operating system creating too many files under the same directory may affect performance)//This design can guarantee high performance API that returns image data based on the specified URL, but also lacks an original url,//that enumerates all the cached image files TODO: Do you want to use a database like SQLite to save this information? TODO: The cache Save IO operation of the file is best designed so that://(1) The response body data is initially placed in a memory Bufio object, and only when the response data length is >4kb to write to the local file, which avoids the useless small file cache// (2) It is best not to write files directly in a separate goproxy handler, but rather to send to specialized IO goroutine processing to improve the performance of IO scheduling? Fixme: There may be the same request URL, and the remote server differs based on other request headers (such as cookies?). ) returns different response data, which is not currently considered image_url: = ctx. Req.URL.String () H: = SHA1. New () H.write ([]byte (Image_url)) Image_url_hash: = Fmt. Sprintf ("%x", H.sum (nil)) log. Println ("url=%s sha1=%s", Image_url, Image_url_hash)//Use this hash value as the file name, here is a file: if the corresponding file already exists? Fs_headers: = Newfilestream (path. Join (*cache_basedir, FMT. Sprintf ("%s.headers", Image_url_hash))) Fmt. fprintf (Fs_headers, "GET%s\r\n", Image_url) fmt. fprintf (fs_headers, "\ r \ n"); for k, V: = Range resp. Header {fmt. fprintf (Fs_headers, "%s:%s\r\n", K, v[0])//type Header map[string][]string}fmt. fprintf (fs_headers, "\ r \ n"); for k, V: = rAnge CTX. Req.header {fmt. fprintf (Fs_headers, "%s:%s\r\n", K, v[0])}fs_headers. Close () Fs_body: = Newfilestream (path. Join (*cache_basedir, FMT. Sprintf ("%s.%s", Image_url_hash, Imagefilesuffix)) resp. Body = Newteereadcloser (resp. Body, Fs_body)//The dump of the response body data here Goproxy-httpdump example of Return resp}) listen_addr, err: = Net. Listen ("TCP", *ADDR) if err! = Nil {log. Fatal ("Listen:", err)}SL: = Newstoppablelistener (listen_addr) ch: = Make (chan os. Signal)//Read data from this Chan to automatically get the user's keyboard interrupt signal? Signal. Notify (CH, os. Interrupt) go func () {<-chlog. Println ("Got SIGINT exiting") SL. ADD (1) SL. Close () SL. Done ()} () log. Println ("Starting Go httpproxy-imagedumper ...") http. Serve (SL, proxy) SL. Wait () log. Println ("All Connections Closed-exit")}

Known issue: The program cannot exit immediately when Ctrl + C tries to end the program.

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.