Data is collected with ' <> ' HTML tags: <span class= ' wmojpqm2azpqma ' > Research <span class= ' WMOJPQM2AZHQMQ ' > The earliest and <span class= ' WmoJPQM2AzxQNw ' > A <span class= ' Wmojpqm2azdqoa ' > teaching as one of the modernization of <span class= ' Wmojpqm2azhqoa ' > Comprehensive <span class= ' WMOJPQM2AZHQMQ ' > Division from the Sex Provincial hospital
here as long as all band <> removal can be:
Dr = Re.compile (R ' <[^>]+> ', re. S) dd = Dr.sub (', Html ')
Complete Python script: The first function: Remove the scraping number from a field the second function: Remove all tags in Html
#!/usr/bin/env python #-*-coding:utf-8-*-import mysqldb import SYS import RE conn = Mysqldb.conn ECT (host= ' 127.0.0.1 ', user= ' user ', passwd= ' 123456 ', db= ' Hospital ', charset= ' utf8 ') cur = conn.cursor () def update_level (): Cur.execute ("Select Id,level from hospital where level like ' (%) '") for Row in Cur.fetchall (): Sid=row
[0] Ii=re.sub (' (|\) ', ', row[1]) sql = "Update hospital set level=%s where id=%s" Print sid,ii param = [Ii,sid] Cur.execute (sql,param) def update_detail (): Dr = Re.compile (R ' <[^>]+> ', re. S) Cur.execute ("SELECT Id,details from hospital") for Row in Cur.fetchall (): did = Row[0] Detail=r
OW[1] dd = dr.sub (', detail) sql= "Update Hospital set details=%s where id=%s" param = (dd,did) Cur.execute (sql,param) print "Finished", did def Main (): #update_level () update_detail () if __name__ = = ' __main__ ': Main ()