python處理html的table標籤

來源:互聯網
上載者:User
import sysimport csvimport urllib2import BeautifulSoup#page    = urllib2.urlopen(sys.argv[1]).read()soup    = BeautifulSoup.BeautifulSoup(open(sys.argv[1]).read())csvout  = csv.writer(sys.stdout)for table in soup.findAll('table'):    print "<table border='1'>"    #print '#'    #print '# Table'    #print '# Fields: ' + ','.join([tr.text for tr in table.findAll('th')])    for row in table.findAll('tr'):        print "<tr>"        #csvout.writerow([tr.text for tr in row.findAll('td')])        for tr in row.findAll('td'):            print "<td>"            print tr.text.encode("utf-8")            print "</td>"        print "</tr>"    print "</table>"    break

#!/bin/bash#process.hbasedir=$(dirname $1)echo $basedir#echo \<head\>\<meta http-equiv=\"Content-Type\" content=\"text/html\; charset=UTF-8\" /\>\</head\> >> $basedir/baobei.htmlprodname=$(grep -o '<title id="id_title">.*</title>' $1 | cut -d \> -f 2 | cut -d \< -f 1)prodname=$(echo $prodname | cut -d _ -f 1)price=$(grep -o '<span class="s1">[0-9]*</span>' $1 | cut -d \> -f 2 | cut -d \< -f 1)echo \<table\>                > $basedir/baobei.htmlecho  \<tr\>                 >> $basedir/baobei.htmlecho  \<td\>Name\</td\>      >> $basedir/baobei.htmlecho  \<td\>$prodname\</td\> >> $basedir/baobei.htmlecho  \</tr\>                >> $basedir/baobei.htmlecho  \<tr\>                 >> $basedir/baobei.htmlecho  \<td\>Price\</td\>     >> $basedir/baobei.htmlecho  \<td\>$price\</td\>    >> $basedir/baobei.htmlecho  \</tr\>                >> $basedir/baobei.html python ./printtab.py $1      >> $basedir/baobei.htmlecho \</table\>              >> $basedir/baobei.htmlimgsrc=$(head -n 1 $basedir/imglist)if test y$imgsrc = y; thenrm -rf $basedir/baobei.html && exit;fiecho \<img src=\'$imgsrc\'/\> >> $basedir/baobei.htmlcat $basedir/baobei.html | tr -d '\n' | tr -d '"' > $basedir/baobei.html.tmpmv  $basedir/baobei.html.tmp                        $basedir/baobei.html

#!/bn/bash#process2.shbasedir=$(dirname $1)name=$(grep  -o "<td>Name</td><td>.*</td>" $1   | cut -d \> -f 4 | cut -d \< -f 1 )if  test "x$name" = "x" ; thenexit ;fiprice=$(grep -o "<td>Price</td><td>.*</td>" $1 | cut -d \> -f 4 | cut -d \< -f 1 )if  test "x$price" = "x" ; thenexit;fiif  test "x$class" = "x"thenclass=$(grep -o "<td>產品類型</td><td>.*</td>" $1 | cut -d \> -f 4 | cut -d \< -f 1 )fiif  test "x$class" = "x"thenclass=$(grep -o "<td>裝置類型</td><td>.*</td>" $1 | cut -d \> -f 4 | cut -d \< -f 1 )fiif  test "x$class" = "x"thenclass=$(grep -o "<td>列印針數</td><td>.*</td>" $1 | cut -d \> -f 4 | cut -d \< -f 1 )fiif  test "x$class" = "x"thenclass="條碼印表機"fiif   $( echo $class | grep --quiet '票據' )thenclass="536187477"elif $( echo $class | grep --quiet '發票' )thenclass="536187477"elif $( echo $class | grep --quiet '票證' )thenclass="536187477"elif $( echo $class | grep --quiet '存摺' )thenclass="536187477"##################################################################elif $( echo $class | grep --quiet '針' )thenclass="536187477"##################################################################elif $( echo $class | grep --quiet '燈泡' )thenclass="536187479"elif $( echo $class | grep --quiet 'UHE' )thenclass="536187479"elif $( echo $class | grep --quiet 'UHP' )thenclass="536187479"elif $( echo $class | grep --quiet 'HSCR' )thenclass="536187479"###############################################################elif $( echo $class | grep --quiet '條碼印表機' )thenclass="536187480"##################################################################elif $( echo $class | grep --quiet '證卡列印' )thenclass="536187483"##################################################################elif $( echo $class | grep --quiet '條碼' )thenclass="536187481"elif $( echo $class | grep --quiet '掃描' )thenclass="536187481"elif $( echo $class | grep --quiet '閱讀' )thenclass="536187481"elif $( echo $class | grep --quiet '採集' )thenclass="536187481"elif $( echo $class | grep --quiet '手持' )thenclass="536187481"elif $( echo $class | grep --quiet '資料終端' )thenclass="536187481"##################################################################elif $( echo $class | grep --quiet '雷射' )thenclass="536187484"##################################################################elif $( echo $class | grep --quiet '噴墨' )thenclass="536187486"##################################################################elif $( echo $class | grep --quiet '複印' )thenclass="536187615"##################################################################elif $( echo $class | grep --quiet '一體機' )thenclass="536187485"##################################################################elif $( echo $class | grep --quiet '硒鼓' )thenclass="536187616"elif $( echo $class | grep --quiet '墨盒' )thenclass="536187616"elseclass="536187616"fi################################################################imagepath=$(find $basedir -type f -iname "*.jpg")if test "x$imagepath" = "x"; then    exit ;fiimage=$(md5sum $imagepath | cut -d ' ' -f 1)cp -rf $imagepath $basedir/../../template/$image.tbi################################################################desc=$(cat $1)################################################################echo -e \"$name\""\t"110514"\t"\",$class,\""\t"1"\t"\"上海\""\t"\"上海\""\t"\"b\""\t"$price"\t"0.000000"\t"1"\t"7"\t"2"\t"0.000000"\t"0.000000"\t"0.000000"\t""\t""\t"1"\t"1"\t"0"\t"1"\t"1"\t"0"\t"\"2012-10-16 13:09:48\""\t""\t"\"$desc\""\t""\t"\"20000:31140\;20196:3228846\;29969:107401\;30681:32998\;31468:102250\;31479:92188\;3415558:27513\;3415563:21959\;3415571:21959\;3415581:10122\;3415609:22041\;7884463:75957615\;14319244:80897641\;14319250:123483713\;14791484:10285019\;\""\t""\t""\t"0"\t"0"\t"\"2012-10-16 13:37:51\""\t"100"\t""\t"0"\t"\"$image:0:0:\|\;\""\t"\"\""\t"\"\""\t"\",\""\t"\",\""\t"\"\""\t"\"\""\t"0"\t"\"15758222730\""\t"15758222730

 classtable = {17     "536187477" : "票據印表機" ,18     "536187478" : "針式印表機" ,19     "536187479" : "投影燈泡"   ,20     "536187480" : "條碼印表機" ,21     "536187481" : "條碼裝置"   ,22     "536187483" : "證卡印表機" ,23     "536187484" : "雷射印表機" ,24     "536187485" : "多功能一體機" ,25     "536187486" : "噴墨印表機" ,26     "536187615" : "複印複合機" ,27     "536187616" : "硒鼓"  ,28 }

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.