This is a creation in Article, where the information may have evolved or changed. Use the Go Language +goproxy Library to write HTTP proxy server + Picture cache save first version completed
Usage:
1. Install Go 1.5.x
2, command line set environment variable GOSRC (under Windows Settings to "My Computer")
3. Install Goproxy library: Go get github.com/elazarl/goproxy
4. Run script: Go run go-httproxy-imagedumper.go
5. You can create EXE version with Go build
Package Mainimport ("CRYPTO/SHA1" "Errors" "Flag" "FMT" "io" "Log" "Net" "net/http"//"Net/http/httputil" "OS" "Os/signal" "Path" "RegExp" "Sync"//"Time" "github.com/elazarl/goproxy"//"Github.com/elazarl/goproxy/transport")//----------- ----------------------------------------------------------------Code Copied from Examples/goproxy-httpdumptype FileStream struct {path STRINGF *os. File}func newfilestream (Path string) *filestream {return &filestream{path, Nil}}func (fs *filestream) Write (b []byte ) (nr int, err error) {if fs.f = = Nil {fs.f, err = os. Create (Fs.path) if err! = Nil {return 0, Err}}return Fs.f.write (b)}func (fs *filestream) Close () error {FMT. Println ("Close", fs.path) if fs.f = = Nil {return errors. New ("FileStream is never written into")}return Fs.f.close ()}//---------------------------------------------------- -----------------------Code Copied from examples/goproxy-httpdump//teereadcloser extends Io. Teereader by allowing reader and writer to be//Closed.type teereadcloser Struct {r io. Readerw io. Writecloserc io. Closer}func newteereadcloser (R io. Readcloser, W io. Writecloser) io. Readcloser {return &teereadcloser{io. Teereader (R, W), W, R}}func (t *teereadcloser) Read (b []byte) (int, error) {return T.r.read (b)}//close attempts to close The reader and write. It returns an error if both//failed to Close.func (T *teereadcloser) Close () error {err1: = T.c.close () err2: = T.w.close ( ) if err1! = Nil {return Err1}return err2}//------------------------------------------------------------------------- --Code Copied from examples/goproxy-httpdump//Stoppablelistener serves stoppableconn and tracks their lifetime to notify When it was safe to terminate the Application.type stoppablelistener struct {net. listenersync.waitgroup}//the name that starts with a lowercase letter so that the type is not exported? Type stoppableconn struct {net. Connwg *sync. Waitgroup}func Newstoppablelistener (l net. Listener) *stoppablelistener {return &stoppablelistener{l, sync. Waitgroup{}}}func (SL *stoppablelistener) Accept () (NET. Conn, error){c, err: = sl. Listener.accept () if err! = Nil {return c, err}sl. ADD (1) return &stoppableconn{c, &SL. Waitgroup}, Nil}func (sc *stoppableconn) Close () error {Sc.wg.Done () return SC. Conn.close ()}var Imagesubtypefinder = RegExp. Mustcompile (' image/([a-z\-]+) ')//actual possible value: Image/png;charset=utf-8func Main () {verbose: = flag. Bool ("V", true, "should every proxy request is logged to stdout")//debug is temporarily set to trueaddr: = flag. String ("L", ": 8888", "on which address should the proxy listen") Cache_basedir: = flag. String ("D", "Cache-basedir", "cache directory to dump image response data") flag. Parse () Proxy: = Goproxy. Newproxyhttpserver () proxy. Verbose = *verboseif ERR: = OS. Mkdirall (*cache_basedir, 0755); Err! = Nil {log. Fatal ("Can ' t create cache base dir", err)}//prohibit some host requests that have been blocked by GFW to prevent the browser from having IO blocking: proxy.onrequest (goproxy. Reqhostis ("ajax.googleapis.com")). Dofunc (//This requires the host field specified in the header?) Func (R *http. Request, CTX *goproxy. PROXYCTX) (*http. Request, *http. Response) {return R, Goproxy. Newresponse (R,goproxy. ConteNttypetext, http. Statusrequesttimeout, "Access blocked"); /test URL: Http://shop.oreilly.com/category/mobile/new.do test successful proxy. Onresponse (). Dofunc (func (Resp *http. Response, CTX *goproxy. PROXYCTX) *http. Response {//Filter Condition://1, Status Code ==200//2, Response content-type== "image/*"//Note: Goproxy currently does not seem to handle chunked/gzipped response body, So the data is saved in the original format (this has no effect on the browser client) if CTX. Req.method! = "GET" {return resp;} If Resp==nil {return resp;//goproxy unique architectural design, regardless of}if resp. StatusCode = $ {return resp;//let it pass;} Contenttypeheadervalue: = resp. Header.get ("Content-type") if Contenttypeheadervalue = = "" {//todo: Even if no content-type is specified in the response header, It is still possible to judge whether the image data return resp by trying the Decode method;} M: = Imagesubtypefinder.findstringsubmatch (contenttypeheadervalue) if M==nil {return resp;//not image type, not processing}imagesubtype: = M[1]imagefilesuffix: = Imagesubtypeswitch imagefilesuffix{case "X-icon": Imagefilesuffix = "ico" case "JPEG": Imagefilesuffix = "JPG"}//can now store image data to a local file, the storage format is divided into 2 files://(1) < image original URL SHA1 hash > ". Headers"//Where the first line is get <ur L> and then the empty line, behindWith a response to the head; a blank line, followed by the request header data//(2) < image original URL of the SHA1 hash > "." imagefilesuffix//these 2 files appear in pairs, and may be mapped to subdirectories under the cache base directory (general operating system creating too many files under the same directory may affect performance)//This design can guarantee high performance API that returns image data based on the specified URL, but also lacks an original url,//that enumerates all the cached image files TODO: Do you want to use a database like SQLite to save this information? TODO: The cache Save IO operation of the file is best designed so that://(1) The response body data is initially placed in a memory Bufio object, and only when the response data length is >4kb to write to the local file, which avoids the useless small file cache// (2) It is best not to write files directly in a separate goproxy handler, but rather to send to specialized IO goroutine processing to improve the performance of IO scheduling? Fixme: There may be the same request URL, and the remote server differs based on other request headers (such as cookies?). ) returns different response data, which is not currently considered image_url: = ctx. Req.URL.String () H: = SHA1. New () H.write ([]byte (Image_url)) Image_url_hash: = Fmt. Sprintf ("%x", H.sum (nil)) log. Println ("url=%s sha1=%s", Image_url, Image_url_hash)//Use this hash value as the file name, here is a file: if the corresponding file already exists? Fs_headers: = Newfilestream (path. Join (*cache_basedir, FMT. Sprintf ("%s.headers", Image_url_hash))) Fmt. fprintf (Fs_headers, "GET%s\r\n", Image_url) fmt. fprintf (fs_headers, "\ r \ n"); for k, V: = Range resp. Header {fmt. fprintf (Fs_headers, "%s:%s\r\n", K, v[0])//type Header map[string][]string}fmt. fprintf (fs_headers, "\ r \ n"); for k, V: = rAnge CTX. Req.header {fmt. fprintf (Fs_headers, "%s:%s\r\n", K, v[0])}fs_headers. Close () Fs_body: = Newfilestream (path. Join (*cache_basedir, FMT. Sprintf ("%s.%s", Image_url_hash, Imagefilesuffix)) resp. Body = Newteereadcloser (resp. Body, Fs_body)//The dump of the response body data here Goproxy-httpdump example of Return resp}) listen_addr, err: = Net. Listen ("TCP", *ADDR) if err! = Nil {log. Fatal ("Listen:", err)}SL: = Newstoppablelistener (listen_addr) ch: = Make (chan os. Signal)//Read data from this Chan to automatically get the user's keyboard interrupt signal? Signal. Notify (CH, os. Interrupt) go func () {<-chlog. Println ("Got SIGINT exiting") SL. ADD (1) SL. Close () SL. Done ()} () log. Println ("Starting Go httpproxy-imagedumper ...") http. Serve (SL, proxy) SL. Wait () log. Println ("All Connections Closed-exit")}
Known issue: The program cannot exit immediately when Ctrl + C tries to end the program.