Python language: sitecopy, a script for Shanzhai Web site UI #coding: Utf-8
Import Re, OS, Shutil, sys
Import URLLIB2, Socket, cookielib
From threading import Thread, Stack_size, Lock
From queue import queue
Import time
From gzip import gzipfile
From Stringio import Stringio
Class Contentencodingprocessor (Urllib2. Basehandler):
"" "A handler to add GZIP capabilities to URLLIB2 requests" "
# decode
def http_response (self, req, resp):
Old_resp = Resp
# gzip
If resp. Headers. Get ("content-encoding") = = "gzip":
GZ = Gzipfile (
Fileobj = Stringio (resp. read ()),
mode = "R"
)
RESP = urllib2. Addinfourl (GZ, Old_resp. Headers, old_resp. URL, old_resp. Code)
Resp. msg = Old_resp. Msg
# deflate
If resp. Headers. Get ("content-encoding") = = "Deflate":
GZ = Stringio (Deflate (resp. read ()))
RESP = urllib2. Addinfourl (GZ, Old_resp. Headers, old_resp. URL, old_resp. Code) # ' class to add info ()
Resp. msg = Old_resp. Msg
Return RESP
# deflate Support
Import zlib
def deflate (data): # zlib only provides the zlib compress format, not the deflate format;
Try: # so on top of the all there ' s This workaround:
Return zlib. Decompress (data,-zlib). Max_wbits)
Except Zlib. Error:
Return zlib. Decompress (data)
Class Fetcher:
'''
HTML fetcher
Basic usage
-----------
From Fetcher Import Fetcher
f = fetcher ()
F.get (URL)
Post
----
req = Urllib2. Request (...)
F.post (req)
def push (self, req, repeat = 3):
If not self. Threads:
print ' no thread, return get instead '
Return get (req, repeat)
Self. Q_req. Put (req)
def pop (self):
Try:
data = self. Q_ans. Get (block = True, timeout = 10)
Self. Q_ans. Task_done ()
Except:
data = [', ']
Return data
def threadget (self):
While True:
req = self. Q_req. Get ()
With self. Lock:
Self. Running + 1
ans = self. Get (req)
print ' Got ', req
Self. Q_ans. Put ((req, ans))
Try:
Self. Q_req. Task_done ()
Except:
Pass
With self. Lock:
Self. Running-= 1
Time. Sleep (0.1) # don ' t spam
def proxyisworking (self):
Try:
Self. Opener. Open (' http://www.hsbc.com '). Read (1024)
Return True
Except Exception, what:
Print What
Return False
def get (self, req, repeat = 3):
'''
HTTP GET req and repeat 3 times if failed
HTML text is returned when succeeded
' is returned when failed
'''
Try:
Response = self. Opener. Open (req)
Data = Response. Read ()
Except Exception, what:
Print What, req
if repeat > 0
The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion;
products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the
content of the page makes you feel confusing, please write us an email, we will handle the problem
within 5 days after receiving your email.
If you find any instances of plagiarism from the community, please send an email to:
info-contact@alibabacloud.com
and provide relevant evidence. A staff member will contact you within 5 working days.