The problem today is that there is Unicode encoding in the text document and you want to output the Unicode encoding to Chinese:
The document content is \ u5bb6 \ u91cc \ uff0c \ u529e \ u516c \ u5ba4 \ uff0c \ u5c55 \ u67dc \ uff0c, now we need to change to Chinese.
#-*-Coding: UTF-8 -*-
# ------- Obtain webpage content using Python -------------#
Import sys, urllib
Import JSON
Url = "http://www.baidu.com" # web address
WP = urllib. urlopen (URL) # Open the connection
Content = WP. Read () # Get the page content
Fp = open ("/MR _/tao_app/list_content/list_content.txt", "W") # open a text file
FP. Write (content) # Write Data
FP. Close () # close the file
Fr = open ("/home/AAA/workspace/mr_tao/tao_app/list_content/list_content.txt ")
Content = Fr. Readline ()
JS = JSON. Loads (content)
Print JSON. dumps (JS)
Print JSON. dumps (JS, ensure_ascii = false)
Fr. Close ()
2. output the specified content in the text (print the line with the string "title" in each line ):
#-*-Coding: UTF-8 -*-
STR = 'title'
Fc = open ("/tao_app/list_content/list_content1.txt ")
Fr = FC. readlines ()
# Print the content of each row
# Print fr
For content in FR:
Lines = content. Split (',')
For line in lines:
# Print each line separated by commas
# Print line
# If this row has a string "title", the output is printed.
If STR in line:
Print line
//////////////////////////////////////// //////////////////////////////////////// /////////////////////////////////////
#-*-Coding: UTF-8 -*-
'''
Created on Dec 18,201 2
@ Author: AAA
'''
Import JSON
STR = 'title'
fc = open ("/mr_ta/ta_app/list_content/list_content1.txt")
Fr = FC. readlines ()
# print fr
for content in FR:
lines = content. split (',')
for line in lines:
# print line
If STR in line:
# print line
w_title = open ("/mr_ta/ta_app/list_content/content_title.txt", "A")
w_title.write (line)
FC. close ()
R_title = open ("/mr_ta/ta_app/list_content/content_title.txt ")
Line_title = r_title.readlines ()
For I in xrange (20 ):
Print line_title [I]
JS = JSON. Loads (line_title [0])
Print JSON. dumps (JS, ensure_ascii = false)