這是一個建立於 的文章,其中的資訊可能已經有所發展或是發生改變。使用Go語言+goproxy庫編寫的httpProxy 伺服器+圖片cache儲存 第一版完成
用法:
1、安裝Go 1.5.x
2、命令列設定環境變數GOSRC(Windows下設定到“我的電腦”裡面)
3、安裝goproxy庫:go get github.com/elazarl/goproxy
4、運行指令碼:go run go-httproxy-imagedumper.go
5、可以用go build建立exe版本
package mainimport ("crypto/sha1""errors""flag""fmt""io""log""net""net/http"// "net/http/httputil""os""os/signal""path""regexp""sync"// "time""github.com/elazarl/goproxy"// "github.com/elazarl/goproxy/transport")//--------------------------------------------------------------------------- Code Copied from examples/goproxy-httpdumptype FileStream struct {path stringf *os.File}func NewFileStream(path string) *FileStream {return &FileStream{path, nil}}func (fs *FileStream) Write(b []byte) (nr int, err error) {if fs.f == nil {fs.f, err = os.Create(fs.path)if err != nil {return 0, err}}return fs.f.Write(b)}func (fs *FileStream) Close() error {fmt.Println("Close", fs.path)if fs.f == nil {return errors.New("FileStream was never written into")}return fs.f.Close()}//--------------------------------------------------------------------------- Code Copied from examples/goproxy-httpdump// TeeReadCloser extends io.TeeReader by allowing reader and writer to be// closed.type TeeReadCloser struct {r io.Readerw io.WriteCloserc io.Closer}func NewTeeReadCloser(r io.ReadCloser, w io.WriteCloser) io.ReadCloser {return &TeeReadCloser{io.TeeReader(r, w), w, r}}func (t *TeeReadCloser) Read(b []byte) (int, error) {return t.r.Read(b)}// Close attempts to close the reader and write. It returns an error if both// failed to Close.func (t *TeeReadCloser) Close() error {err1 := t.c.Close()err2 := t.w.Close()if err1 != nil {return err1}return err2}//--------------------------------------------------------------------------- Code Copied from examples/goproxy-httpdump// stoppableListener serves stoppableConn and tracks their lifetime to notify// when it is safe to terminate the application.type stoppableListener struct {net.Listenersync.WaitGroup}//使用小寫字母開頭的名稱是為了不讓類型匯出嗎?type stoppableConn struct {net.Connwg *sync.WaitGroup}func newStoppableListener(l net.Listener) *stoppableListener {return &stoppableListener{l, sync.WaitGroup{}}}func (sl *stoppableListener) Accept() (net.Conn, error) {c, err := sl.Listener.Accept()if err != nil {return c, err}sl.Add(1)return &stoppableConn{c, &sl.WaitGroup}, nil}func (sc *stoppableConn) Close() error {sc.wg.Done()return sc.Conn.Close()}var imageSubTypeFinder = regexp.MustCompile(`image/([a-z\-]+)`) //實際的可能取值:image/png;charset=UTF-8func main() {verbose := flag.Bool("v", true, "should every proxy request be logged to stdout") //調試時暫時設定為trueaddr := flag.String("l", ":8888", "on which address should the proxy listen")cache_basedir := flag.String("d", "cache-basedir", "cache directory to dump image response data")flag.Parse()proxy := goproxy.NewProxyHttpServer()proxy.Verbose = *verboseif err := os.MkdirAll(*cache_basedir, 0755); err != nil {log.Fatal("Can't create cache base dir", err)}//禁止某些已經被gfw阻塞了的host的請求,以防止瀏覽器發生IO阻塞:proxy.OnRequest(goproxy.ReqHostIs("ajax.googleapis.com")).DoFunc(//這個需要要求標頭部裡指定了Host欄位吧?func(r *http.Request, ctx *goproxy.ProxyCtx) (*http.Request, *http.Response) {return r, goproxy.NewResponse(r,goproxy.ContentTypeText, http.StatusRequestTimeout,"access blocked");});//測試網址:http://shop.oreilly.com/category/mobile/new.do 測試成功proxy.OnResponse().DoFunc(func(resp *http.Response, ctx *goproxy.ProxyCtx) *http.Response {//過濾條件:// 1, 狀態代碼==200// 2, 響應的Content-Type=="image/*"//注意: goproxy當前似乎不能處理chunked/gzipped響應body,所以資料是以原始格式儲存的(這對於瀏覽器用戶端沒有影響)if ctx.Req.Method != "GET" {return resp;}if resp==nil {return resp; //goproxy特有的架構設計;不管}if resp.StatusCode != 200 {return resp; //let it pass;}contentTypeHeaderValue := resp.Header.Get("Content-Type")if contentTypeHeaderValue == "" {//TODO:即使回應標頭部裡沒有指定content-type,照理說仍然可以通過嘗試decode的方法來判斷是否映像資料return resp;}m := imageSubTypeFinder.FindStringSubmatch(contentTypeHeaderValue)if m==nil {return resp; //不是映像類型,不處理}imageSubType := m[1]imageFileSuffix := imageSubTypeswitch imageFileSuffix{case "x-icon":imageFileSuffix = "ico"case "jpeg":imageFileSuffix = "jpg"}//現在可以儲存映像資料到本地檔案了,儲存格式分為2個檔案:// (1) <映像原始url的SHA1雜湊> ".headers"// 其中,第一行是GET <url>, 然後空行,後面跟回應標頭部; 再空行,後面跟要求標頭部資料// (2) <映像原始url的SHA1雜湊> "." imageFileSuffix//這2個檔案成對出現,並可能映射到cache基準目錄下的子目錄(一般作業系統同一目錄下建立太多檔案可能影響效能)//如此設計可以保證高效能的根據指定url返回映像資料的API,但是還缺少一個枚舉所有已經緩衝的影像檔的原始url,// TODO:是否使用資料庫如sqlite儲存這個資訊?//TODO:檔案的cache儲存IO操作最好如此設計://(1)響應body資料一開始放到某個記憶體bufio對象裡,只有當響應資料長度>4KB時才開始寫到本地檔案,這可以避免無用的小檔案cache//(2)最好不是在單獨的goproxy handler裡直接寫檔案,而是發送到專門的IO goroutine處理,以提高IO調度的效能?//FIXME:可能存在同一個請求url,遠程伺服器根據其他要求標頭部的不同(比如cookie?)返回不同響應資料的情況,當前不考慮這種情況image_url := ctx.Req.URL.String()h := sha1.New()h.Write([]byte(image_url))image_url_hash := fmt.Sprintf("%x", h.Sum(nil))log.Println("Url=%s SHA1=%s", image_url, image_url_hash)//使用此hash值作為檔案名稱,這裡有個檔案:假如對應檔案已經存在呢?fs_headers := NewFileStream(path.Join(*cache_basedir, fmt.Sprintf("%s.headers", image_url_hash)))fmt.Fprintf(fs_headers, "GET %s\r\n", image_url)fmt.Fprintf(fs_headers, "\r\n");for k, v := range resp.Header {fmt.Fprintf(fs_headers, "%s: %s\r\n", k, v[0]) //type Header map[string][]string}fmt.Fprintf(fs_headers, "\r\n");for k, v := range ctx.Req.Header {fmt.Fprintf(fs_headers, "%s: %s\r\n", k, v[0])}fs_headers.Close()fs_body := NewFileStream(path.Join(*cache_basedir, fmt.Sprintf("%s.%s", image_url_hash, imageFileSuffix)))resp.Body = NewTeeReadCloser(resp.Body, fs_body) //這裡的響應body資料的dump模仿goproxy-httpdump例子的寫法return resp})listen_addr, err := net.Listen("tcp", *addr)if err != nil {log.Fatal("listen:", err)}sl := newStoppableListener(listen_addr)ch := make(chan os.Signal) //從這個chan中讀資料能夠自動取得使用者的鍵盤中斷訊號?signal.Notify(ch, os.Interrupt)go func() {<-chlog.Println("Got SIGINT exiting")sl.Add(1)sl.Close()sl.Done()}()log.Println("Starting Go HttpProxy-ImageDumper...")http.Serve(sl, proxy)sl.Wait()log.Println("All connections closed - exit")}
已知問題:Ctrl+C試圖結束程式運行時,程式不能立即退出。