Copy CodeThe code is as follows:
def get_seed_data (filename):
Dom = minidom.parse (filename)
root = Dom.documentelement
System_nodes = Root.getelementsbytagname ("System")
K = 0
Seed_list = []
For System_node in System_nodes:
#print system_node.nodename+ ' id= ' +system_node.getattribute (' id ')
system_id = System_node.getattribute ("id")
System_name = System_node.getattribute ("name")
#print ' system_name:%s '%system_name
Section_nodes = system_node.getelementsbytagname ("section")
For Section_node in Section_nodes:
section_id = Section_node.getattribute (' id ')
Section_name = Section_node.getattribute (' name ')
#print ' +section_node.nodename+ ' id= ' +section_id+ ' name= ' +section_name
Crawl_cycle_node = Section_node.getelementsbytagname ("Crawl_cycle")
Crawl_cycle = Crawl_cycle_node[0].childnodes[0].nodevalue
#print ' +crawl_cycle_node[0].nodename+ ' = ' +crawl_cycle
Seed_nodes = section_node.getelementsbytagname (' seed ')
For Seed_node in Seed_nodes:
Seed = {}
seed[' crawl_cycle '] = crawl_cycle
seed[' system_id ' = Int (system_id)
seed[' system_name '] = System_name
seed[' section_id ' = Int (section_id)
seed[' section_name '] = section_name
seed_id = Seed_node.getattribute (' id ')
seed[' seed_id ' = Int (seed_id)
#print ' +seed_node.nodename+ ' + ' id= ' +seed_id
Userblog_url_node = Seed_node.getelementsbytagname (' Userblog_url ')
Userblog_url = Userblog_url_node[0].childnodes[0].nodevalue
seed[' userblog_url '] = Userblog_url
#print ' + ' userblog_url ' + ' +userblog_url
print '-------------------------------------------'
print ' system_id:%d '% seed[' system_id '
print ' system_name:%s '%seed[' system_name ']
print ' section_id:%d '% seed[' section_id '
print ' section_name:%s '% seed[' section_name '
print ' seed_id:%d '%seed[' seed_id ']
print ' userblog_url:%s '%seed[' Userblog_url ']
print ' ========================= '
Seed_list.append (Seed)
Print Seed_list[k]
K + = 1
Os.system (' pause ')
Return seed_list
Copy the Code code as follows:
Http://aaa.com.cn/loveissuuny
Http://aaa.com.cn/loveissuuny
Http://aaa.com.cn/sanxiazaixian
Http://aaa.com.cn/twocold
http://aaa.com.cn/u/1233526741