#-*-coding:utf-8-*-#python#Xiaodeng#python module Htmlparser all URL links on the pageImportUrllib#Myparser Class One" "From htmlparser import htmlparserclass myparser (htmlparser): def __init__ (self): htmlparser.__init__ (self) def handle_starttag (self, Tag, attrs): if tag = = ' A ': for Name,value in Attrs:i F name = = ' href ' and value.startswith (' http '): Print value" "#Myparser class notation twoImportHtmlparserclassMyparser (htmlparser.htmlparser):defHandle_starttag (self, Tag, attrs):
#这里重新定义了处理开始标签的函数ifTag = ='a': Properties of #判断标签 <a> forName,valueinchAttrs:ifName = ='href' andValue.startswith ('http'):#start with what string Printvalueif __name__=='__main__': URL='http://www.cnblogs.com/'content=urllib.urlopen (URL). read () My=Myparser () my.feed (content) My.close ()" " http://www.cnblogs.com/Jaryleely/p/careertwo.htmlhttp://www.cnblogs.com/ jaryleely/http://www.cnblogs.com/jaryleely/http://www.cnblogs.com/jaryleely/p/careertwo.html#commentformhttp:/ /www.cnblogs.com/jaryleely/p/careertwo.htmlhttp://www.cnblogs.com/androidjotting/p/4983688.htmlhttp:// www.cnblogs.com/AndroidJotting/http://www.cnblogs.com/AndroidJotting/http://www.cnblogs.com/AndroidJotting/p/ 4983688.html#commentformhttp://www.cnblogs.com/androidjotting/p/4983688.htmlhttp://www.cnblogs.com/ fuly550871915/p/4983682.htmlhttp://www.cnblogs.com/fuly550871915/http://www.cnblogs.com/fuly550871915/http:// www.cnblogs.com/fuly550871915/p/4983682.html#commentformhttp://www.cnblogs.com/fuly550871915/p/4983682. htmlhttp://www.cnblogs.com/ray-liang/p/4983592.htmlhttp://www.cnblogs.com/ray-liang/http://www.cnblogs.com/ ray-liang/http://www.cnblogs.com/ray-liang/p/4983592.html#commentformhttp://www.cnblogs.com/ray-liang/p/ 4983592.html ..... " "
Python module Htmlparser all URL links on the page