Python-multi-threaded/multi-process

Source: Internet
Author: User

Multithreading:

Import threadingfrom multiprocessing Import queuefrom time import sleepfrom BS4 import beautifulsoupfrom requests Import g Etimport Reclass MyThread (threading. Thread): Def __init__ (self, Qlock, queue): Threading. Thread.__init__ (self) self.qlock = Qlock self.queue = Queue def run (self): Process (Self.qlock, SEL    F.QUEUE) def process (Qlock, queue): Qlock.acquire () # Mutex try:data = Queue.get () # Get queue print (data) Finally:qlock.release () # Release lock Sleep (1) # build Queue Workqueue = Queue (Qlock) = Threading. Lock () url = ' https://www.pixiv.net/ranking.php?mode=daily ' r = Get (URL, timeout=1) html = R.textsoup = BeautifulSoup (html    , ' lxml ') urls = Soup.find_all (' img ') links = []for URL in urls:r = Re.compile (R ' data-src= "(. +?)") link = r.findall (str (URL)) workqueue.put (link) # Write queue links.append (link) threads = []for URL in links:thread = M Ythread (Qlock, workQueue) Thread.daemon = True Thread.Start () threads.append (thread) #Empty queue while not Workqueue.empty (): pass# waiting for thread to end for T in Threads:t.join () 

Multi-process:

1. Create a process pool using the Pool module:

From multiprocessing import poolfrom BS4 import beautifulsoupfrom requests import Getimport reimport osdef run_process (url ):    print (URL) if __name__ = = ' __main__ ':    url = ' https://www.pixiv.net/ranking.php?mode=daily '    html = Get ( URL, timeout=1). Text    soup = beautifulsoup (html, ' lxml ')    urls = Soup.find_all (' img ')    links = [] for    u in U RLS:        r = re.compile (R ' data-src= "(. +?). JPG)        link = r.findall (str (u))        links.append (link)    process = Pool (Os.cpu_count ()) # Number of CPU cores for    u in Links:        Process.apply_async (run_process,args= (u))    process.close ()    process.join ()

2.Process module, queue module for interprocess communication (but my write queue is not multi-process):

From multiprocessing import Process, Queuefrom BS4 import beautifulsoupfrom requests import Getimport Reclass myprocess (Pr ocess):    def __init__ (self, queue):        process.__init__ (self)        self.queue = Queue    def run (self):        run _process (Self.queue) def run_process (queue):    data = Queue.get ()    print (data) if __name__ = = ' __main__ ':    URL = ' https://www.pixiv.net/ranking.php?mode=daily '    html = Get (URL, timeout=1). Text    soup = beautifulsoup (HTML, ' lxml ')    urls = Soup.find_all (' img ')    queue = Queue (+)    links = [] for    u in URLs:        r = re.compile (R ' Data-src= "(. +?. JPG)        link = r.findall (str (u))        queue.put (link)        links.append (link) for u in    Links:        Process = myprocess (queue)        Process.Start () and not    Queue.empty ():        pass    Process.join ()

The 2nd one is significantly slower than the 1th one and does not know why ...

But the above is CPU-intensive, test the IO-intensive small reptile to see the effect:

1. Multithreading:

Import threadingfrom multiprocessing Import queuefrom time import sleepfrom BS4 import beautifulsoupfrom requests Import g Etimport Reclass MyThread (threading. Thread): Def __init__ (self, Qlock, queue): Threading. Thread.__init__ (self) self.qlock = Qlock self.queue = Queue def run (self): Process (Self.qlock, SEL F.QUEUE) def process (Qlock, queue): Qlock.acquire () # Mutex try:url = Queue.get () [0] # Get queue img = Get (ur l,timeout=1). Content name = Url.split ('/') [-1] imgid = Name[:8] with open (' C:/users/adimin/desktop/vi Deo/{}.jpg '. Format (imgid), ' WB ') as Fp:fp.write (IMG) print (' Download: ' + URL) finally:qlock . Release () # Sleep (1) # build Queue Workqueue = Queue (Qlock = threading). Lock () url = ' https://www.pixiv.net/ranking.php?mode=daily ' html = Get (URL, timeout=1). Textsoup = BeautifulSoup (HTML, ' lxml ') urls = Soup.find_all (' img ') links = []for u in urls:r = Re.compile (R ' data-src= "(. +?). JPG) "') LiNK = R.findall (str (u)) workqueue.put (link) # Write queue links.append (link) threads = []for u in links:thread = Mythrea D (Qlock, WorkQueue) Thread.Start () threads.append (thread) # empty queue while not Workqueue.empty (): pass# wait for thread to end for T in Threads:t.join ()

2. Multi-process:

From multiprocessing import Process, Queuefrom BS4 import beautifulsoupfrom requests import Getimport Reclass myprocess (Pr ocess): Def __init__ (self, queue): process.__init__ (self) self.queue = Queue def run (self): Run  _process (Self.queue) def run_process (queue): url = queue.get () [0] # Get queue img = Get (URL, timeout=1). Content name =        Url.split ('/') [-1] imgid = Name[:8] with open (' c:/users/adimin/desktop/video/{}.jpg '. Format (imgid), ' WB ') as FP: Fp.write (IMG) print (' Download: ' + URL) if __name__ = = ' __main__ ': url = ' Https://www.pixiv.net/ranking.php?mode  =daily ' html = Get (URL, timeout=1). Text soup = beautifulsoup (html, ' lxml ') urls = Soup.find_all (' img ') queue = Queue (+) links = [] for u in urls:r = Re.compile (R ' data-src= "(. +?).  JPG) link = r.findall (str (u)) queue.put (link) links.append (link) for u in links:process = myprocess (queue) Process.Start () and not Queue.emPty (): Pass Process.join () 

Finally, the feeling of running time is almost ... Or not to see the gap.

Python-multi-threaded/multi-process

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.