import sysimport csvimport urllib2import BeautifulSoup#page = urllib2.urlopen(sys.argv[1]).read()soup = BeautifulSoup.BeautifulSoup(open(sys.argv[1]).read())csvout = csv.writer(sys.stdout)for table in soup.findAll('table'): print "<table border='1'>" #print '#' #print '# Table' #print '# Fields: ' + ','.join([tr.text for tr in table.findAll('th')]) for row in table.findAll('tr'): print "<tr>" #csvout.writerow([tr.text for tr in row.findAll('td')]) for tr in row.findAll('td'): print "<td>" print tr.text.encode("utf-8") print "</td>" print "</tr>" print "</table>" break
#!/bin/bash#process.hbasedir=$(dirname $1)echo $basedir#echo \
#! /BN/bash # process2.shbasedir = $ (dirname $1) name = $ (grep-o "<TD> name </TD> <TD>. * </TD> "$1 | cut-d \>-F 4 | cut-d \ <-F 1) If test" x $ name "=" X "; thenexit; fiprice =rice (grep-o "<TD> price </TD> <TD>. * </TD> "$1 | cut-d \>-F 4 | cut-d \ <-F 1) If test" x $ price "=" X "; thenexit; fiif test "x $ class" = "X" thenclass = $ (grep-o "<TD> product type </TD> <TD>. * </TD> "$1 | cut-d \>-F 4 | cut-d \ <-F 1) fiif test "x $ class" = "X" thenclass = $ (grep-o "<TD> device type </TD> <TD>. * </TD> "$1 | cut-d \>-F 4 | cut-d \ <-F 1) fiif test "x $ class" = "X" thenclass = $ (grep-o "<TD> print the number of needles </TD> <TD>. * </TD> "$1 | cut-d \>-F 4 | cut-d \ <-F 1) fiif test "x $ class" = "X" thenclass = "barcode printer" fiif $ (echo $ class | grep -- Quiet 'bill ') thenclass = "536187477" Elif $ (echo $ class | grep -- Quiet 'invoiced ') thenclass = "536187477" Elif $ (echo $ class | grep -- Quiet 'ticket ') thenclass = "536187477" Elif $ (echo $ class | grep -- Quiet 'passbook ') thenclass = "536187477 "################################### ############################## Elif $ (echo $ class | grep -- quiet 'needle ') thenclass = "536187477 "################################### ############################## Elif $ (echo $ class | grep -- quiet 'lightbulp ') thenclass = "536187479" Elif $ (echo $ class | grep -- Quiet 'uhe ') thenclass = "536187479" Elif $ (echo $ class | grep -- Quiet 'uhp ') thenclass = "536187479" Elif $ (echo $ class | grep -- Quiet 'hscr ') thenclass = "536187479 "################################### ############################ Elif $ (echo $ class | grep -- Quiet 'bar code printer ') thenclass = "536187480 "################################### ############################## Elif $ (echo $ class | grep -- quiet 'Card printing ') thenclass = "536187483 "################################### ############################## Elif $ (echo $ class | grep -- quiet 'barbar ') thenclass = "536187481" Elif $ (echo $ class | grep -- Quiet 'scan') thenclass = "536187481" Elif $ (echo $ class | grep -- Quiet 'read ') thenclass = "536187481" Elif $ (echo $ class | grep -- Quiet 'collection') thenclass = "536187481" Elif $ (echo $ class | grep -- Quiet 'handler ') thenclass = "536187481" Elif $ (echo $ class | grep -- Quiet 'data Terminal ') thenclass = "536187481 "################################### ############################## Elif $ (echo $ class | grep -- quiet 'laser ') thenclass = "536187484 "################################### ############################## Elif $ (echo $ class | grep -- quiet 'inkjet ') thenclass = "536187486 "################################### ############################## Elif $ (echo $ class | grep -- quiet 'photocopied ') thenclass = "536187615 "################################### ############################## Elif $ (echo $ class | grep -- quiet '') thenclass = "536187485 "################################### ############################## Elif $ (echo $ class | grep -- quiet 'toner cartridges ') thenclass = "536187616" Elif $ (echo $ class | grep -- Quiet 'cartridge ') thenclass = "536187616" elseclass = "536187616" fi ############################# ################################### ImagePath = $ (find $ basedir-type F-INAME "*. jpg ") if test" x $ ImagePath "=" X "; then exit; fiimage =$ (md5sum $ ImagePath | cut-D''-F 1) CP-RF $ ImagePath $ basedir /.. /.. /template/$ image. traumatic Brain Injury ####################################### ######################### DESC = $ (cat $1) ######################################## ######################## echo-e \ "$ name \" "\ t" 110514 "\ t "\", $ class, \ "" \ t "1" \ t "\" Shanghai \ "" \ t "\" Shanghai \ "\ t" \ "B \" "\ t" $ price "\ t" 0.000000 "\ t" 1 "\ t" 7 "\ t" 2 "\ t" 0.000000 "\ t" 0.000000 "\ t" 0.000000 "\ t "" \ t "" \ t "1" \ t "1" \ t "0" \ t "1" \ t "1" \ t "0" \ t "\" 2012-10-16 13:09:48 \ "" \ t "" \ t "\" $ DESC \ "" \ t "" \ t "\" 20000:31140 \; 20196: 3228846 \; 29969: 107401 \; 30681: 32998 \; 31468: 102250 \; 31479: 92188 \; 3415558: 27513 \; 3415563: 21959 \; 3415571: 21959 \; 3415581: 10122 \; 3415609: 22041 \; 7884463: 75957615 \; 14319244: 80897641 \; 14319250: 123483713 \; 14791484: 10285019 \; \ "" \ t "0" \ t "0" \ t "\" 13:37:51 \ "" \ t "100" \ t "" \ t "0" \ t "\" $ image: 0: 0 :\|\; \ "" \ t "\" \ t "\ t "\", \ "" \ t "\", \ "" \ t "\" \ "" \ t "\" \ t "0" \ t "\" 15758222730 \ "" \ t "15758222730
Classtable = {17 "536187477": "Bill printer", 18 "536187478": "dot matrix printer", 19 "536187479": "projection bulb", 20 "536187480 ": "barcode printer", 21 "536187481": "barcode equipment", 22 "536187483": "card printer", 23 "536187484": "laser printer", 24 "536187485 ": "All-in-One Machine", 25 "536187486": "Inkjet Printer", 26 "536187615": "copying Compound Machine", 27 "536187616": "toner cartridge", 28}