Reprint: Http://www.xuebuyuan.com/583071.htmlpython Processing of HTML table tagsJanuary 06, 2012 ⁄ general ⁄ a total of 5279 characters ⁄ font size small medium big ⁄ comments off
Import sysimport csvimport urllib2import beautifulsoup#page = Urllib2.urlopen (sys.argv[1]). Read () Soup = Beautifulsoup.beautifulsoup (open (sys.argv[1)). Read ()) Csvout = Csv.writer (sys.stdout) for table in Soup.findall ( ' table '): print "<table border= ' 1 ' >" #print ' # ' #print ' # table ' #print ' # fields: ' + ', '. Join ([ Tr.text for TR in table.findall (' th ')])-for row in Table.findall (' tr '): print "<tr>" # Csvout.writerow ([Tr.text for TR in Row.findall (' TD ')])- tr in Row.findall (' TD '): print "<td>" Print Tr.text.encode ("Utf-8") print "</td>" print "</tr>" print "</table>" Break
#!/bin/bash#process.hbasedir=$ (dirname $) echo $basedir #echo \
#!/bn/bash#process2.shbasedir=$ (DirName $) name=$ (grep-o "<td>Name</td><td>.*</td>" $ | Cut-d \>-F 4 | Cut-d \<-F 1) if test "x$name" = "x"; Thenexit fiprice=$ (grep-o "<td>Price</td><td>.*</td>" $ | cut-d \>-F 4 | cut-d \<-F 1 ) if test "x$price" = "x"; Thenexit;fiif test "X$class" = "x" thenclass=$ (grep-o "<td> product type </td><td>.*</td>" | cut-d \> ; -F 4 | Cut-d \<-F 1) fiif test "X$class" = "x" thenclass=$ (grep-o "<td> device type </td><td>.*</td>" $ | c Ut-d \>-F 4 | Cut-d \<-F 1) fiif test "X$class" = "x" thenclass=$ (grep-o "<td> print pin number </td><td>.*</td>" $ | c Ut-d \>-F 4 | Cut-d \<-F 1) fiif test "X$class" = "x" thenclass= "Barcode Printer" Fiif $ (echo $class | grep--quiet ' notes ') thenclass= "53618 7477 "Elif $ (echo $class | grep--quiet ' invoice ') thenclass=" 536187477 "Elif $ (echo $class | grep--quiet ' ticket ') thenclass=" 536 187477 "EliF $ (echo $class | grep--quiet ' passbook ') thenclass= "536187477" ########################################################### ###### #elif $ (echo $class | grep--quiet ' pin ') thenclass= "536187477" ################################################## ############### #elif $ (echo $class | grep--quiet ' bulb ') thenclass= "536187479" Elif $ (echo $class | grep--quiet ' UHE ') th enclass= "536187479" Elif $ (echo $class | grep--quiet ' UHP ') thenclass= "536187479" Elif $ (echo $class | grep--quiet ' HSCR ') thenclass= "536187479" ############################################################## #elif $ (echo $class | grep-- Quiet ' barcode printer ') thenclass= "536187480" ################################################################# #elif $ ( echo $class | grep--quiet ' card print ') thenclass= "536187483" ################################################################# #elif $ (Echo $class | grep--quiet ' barcode ') thenclass= "536187481" Elif $ (echo $class | grep--quiet ' scan ') thenclass= "536187481" elif $ (echo $class | grep--quiet ' reading ') Thenclass= "536187481" Elif $ (echo $class | grep--quiet ' acquisition ') thenclass= "536187481" Elif $ (echo $class | grep--quiet ' handheld ') then class= "536187481" Elif $ (echo $class | grep--quiet ' data terminal ') thenclass= "536187481" ##################################### ############################ #elif $ (echo $class | grep--quiet ' laser ') thenclass= "536187484" ########################### ###################################### #elif $ (echo $class | grep--quiet ' inkjet ') thenclass= "536187486" ################# ################################################ #elif $ (echo $class | grep--quiet ' copy ') thenclass= "536187615" ####### ########################################################## #elif $ (echo $class | grep--quiet ' All in one machine ') thenclass= " 536187485 "################################################################# #elif $ (echo $class | grep--quiet ' toner Cartridge ') thenclass= "536187616" Elif $ (echo $class | grep--quiet ' cartridges ') thenclass= "536187616" elseclass= "536187616" fi############ ################################################### #imagepath=$ (Find $basedir-type f-iname "*.jpg") if test "x$imagepath" = "x"; Then exit; fiimage=$ (md5sum $imagepath | cut-d "-F 1) CP-RF $imagepath $basedir/. /.. /template/$image. tbi############################################################### #desc =$ (cat $) ############# ################################################## #echo-e \ "$name \" "\ T" 110514 "\ T" \ ", $class, \" "\ T" 1 "\ T" \ "shanghai \" "\ T "\" "\" "\" "\" \ "b\" "\ T" $price "\ T" 0.000000 "\ T" 1 "\ T" 7 "\ T" 2 "\ T" 0.000000 "\ T" 0.000000 "\ T" 0.000000 "\ t" "\ T" "\ T" 1 "\ t" 1 "\ t "0" \ t "1" \ t "1" \ t "0" \ T "\" 2012-10-16 13:09:48\ "" \ T "" \ T "\" $desc \ "" \ T "" \ T "\" 20000:31140\;20196:3228846\;29969:107401 \;30681:32998\;31468:102250\;31479:92188\;3415558:27513\;3415563:21959\;3415571:21959\;3415581:10122\; 3415609:22041\;7884463:75957615\;14319244:80897641\;14319250:123483713\;14791484:10285019\;\ "\ t" "\ T" "\ T" 0 "\ T" 0 "\ t" \ "2012-10-16 13:37:51\" "\ T" of "\ T" "\ \" 0 "\ T" \ "$image: 0:0:\|\;\" \ \ "\" \ "\ t" \ "\" "\ t" \ "\" "\ t" \ "\" \ "\ \" \ "\" \ T "\" \ "" \ T "0" \ T "\" 15758222730\ "" \ T "15758222730
classtable = { 536187477 ":" Ticket Printer "," 536187478 ":" Pin Printer "," 536187479 ":" Projector Bulb " , " 536187480 ":" Barcode Printer "," 536187481 ":" Barcode Equipment " ," 536187483 ":" Card Printer "," 536187484 ":" Laser Printer ", 24
"536187485": "All-in-one machine", " 536187486": "Inkjet Printer", " 536187615": "Copy Composite Machine", "536187616": "Toner Cartridge" , 28}
Python handles the table label for HTML