The example of this article is about Python's way of running scrapy in threads. Share to everyone for your reference. Specifically as follows:
If you want to call Scrapy in a well written program, you can use the following code to let Scrapy run in a thread.
"" "" "Code to run Scrapy crawler in a thread-works on scrapy 0.8" "" Import threading, Queue from twisted.internet Import Reactor from Scrapy.xlib.pydispatch import dispatcher to Scrapy.core.manager import Scrapymanager from Scrapy.core.eng INE import scrapyengine from Scrapy.core Import signals Class Crawlerthread (threading. Thread): def __init__ (self): threading. Thread.__init__ (self) self.running = False def run (self): self.running = True scrapymanager.configure (contro L_reactor=false) Scrapymanager.start () Reactor.run (Installsignalhandlers=false) def crawl (self, *args): if n OT self.running:raise runtimeerror ("Crawlerthread not Running") self._call_and_block_until_signal (signals.spide R_closed, \ Scrapymanager.crawl, *args) def stop (self): Reactor.callfromthread (scrapyengine.stop) def _call_ And_block_until_signal (self, signal, F, *a, **kw): Q = queue.queue () def unblock (): Q.put (None) Dispatch Er.connect (UnbloCK, signal=signal) Reactor.callfromthread (f, *a, **kw) Q.get () # Usage example Below:import os os.environ.setde Fault (' Scrapy_settings_module ', ' myproject.settings ') from Scrapy.xlib.pydispatch import dispatcher from Scrapy.core Import signals from scrapy.conf Import settings from Scrapy.crawler import crawlerthread settings.overrides[' Log_ ENABLED ' = False # avoid log noise def item_passed (item): print "Just scraped item:", Item Dispatcher.connect (Item_pass Ed, signal=signals.item_passed) crawler = Crawlerthread () print "Starting crawler thread ..." Crawler.start () print "Crawl
ing somedomain.com ... "crawler.crawl (' somedomain.com) # Blocking call print" Crawling anotherdomain.com ... " Crawler.crawl (' anotherdomain.com ') # Blocking call print "Stopping crawler thread ..." Crawler.stop ()
I hope this article will help you with your Python programming.