This example describes how Python implements scrapy from a script. Share to everyone for your reference. Specific as follows:
Copy the Code code as follows:
#!/usr/bin/python
Import OS
Os.environ.setdefault (' Scrapy_settings_module ', ' project.settings ') #Must be at the top before other imports
From scrapy import log, signals, project
From Scrapy.xlib.pydispatch Import Dispatcher
From scrapy.conf Import settings
From Scrapy.crawler import crawlerprocess
From multiprocessing import Process, Queue
Class Crawlerscript ():
def __init__ (self):
Self.crawler = crawlerprocess (Settings)
If not hasattr (project, ' crawler '):
Self.crawler.install ()
Self.crawler.configure ()
Self.items = []
Dispatcher.connect (self._item_passed, signals.item_passed)
def _item_passed (self, item):
Self.items.append (item)
def _crawl (self, queue, spider_name):
Spider = Self.crawler.spiders.create (spider_name)
If Spider:
Self.crawler.queue.append_spider (spider)
Self.crawler.start ()
Self.crawler.stop ()
Queue.put (Self.items)
def crawl (self, spider):
Queue = Queue ()
p = Process (Target=self._crawl, args= (queue, Spider,))
P.start ()
P.join ()
Return Queue.get (True)
# Usage
if __name__ = = "__main__":
Log.start ()
"""
This example runs Spider1 and then Spider2 three times.
"""
Items = List ()
Crawler = Crawlerscript ()
Items.append (Crawler.crawl (' Spider1 '))
For I in range (3):
Items.append (Crawler.crawl (' Spider2 '))
Print items
Hopefully this article will help you with Python programming.