# Coding:utf8
?
from bs4 import beautifulsoup
Import Re
?
Html_doc = "" "
<body>
<p class= "title" ><b>the dormouse ' s story</b></p>
?
<p class= "Story" >once upon a time there were three Little sisters; and their names were
<a href= "Http://example.com/elsie" class= "sister" id= "Link1" >ELSIE</A>
<a href= "Http://example.com/lacie" class= "sister" id= "Link2" >Lacie</a> and
<a href= "Http://example.com/tillie" class= "sister" id= "Link3" >Tillie</a>;
And they lived at the bottom of a well.</p>
?
<p class= "Story" >...</p>
"""
Soup = beautifulsoup(html_doc,' Html.parser ', from_encoding=' utf-8 ' )
?
Print ' links '
Links = soup. find_all(' a ' )
for link in links :
#print Link
Print link. name, link[' href '],link. get_text( )
?
print ' Get a separate link "
link_code = Soup . find ( ' a ' , href = ' Http://example.com/lacie ' )
Print link_code. name, link_code[' href '],link_code. get_text( )
?
print ' Regular expression matches "
link_code = Soup . find ( ' a ' , Href = re . compile ( r "ill" Span style= "Color:navy" > )
#< ctrl+1> According to re direct import modules not imported
Print link_code. name, link_code[' href '],link_code. get_text( )
?
print ' Get p Paragraph text '
p_class = Soup . find ( ' P ' , Class_ = ' title ' )
Print p_class. name,p_class. get_text()
?
Yesterday
A small piece of Python code with HTML format-word released 2