;? php
//$document should contain an HTML document.
//This example will strip HTML markup, JavaScript code
//and whitespace characters. There will also be some generic
The
//HTML entity is converted to the appropriate text.
$search = Array ("' <script[^>]*?>.*?</script> ' si",//Remove JavaScript
"' <[/!] *? [^<>]*?> ' Si ',//Remove HTML tags
"' ([RN]) [s]+ '",//remove whitespace characters
"' & (quot| #34); ' I ",//replace HTML entity
"' & (amp| #38); ' I ",
"' & (lt| #60); ' I ",
"' & (gt| #62); ' I ",
"' & (nbsp| #160); ' I ",
"' & (iexcl| #161); ' I ",
"' & (cent| #162); ' I ",
"' & (pound| #163); ' I ",
"' & (copy| #169); ' I ",
"' &# (d+); ' E "); Run
as PHP code
$replace = Array ("",
"",
"1",
""",
"&",
"<",
">",
" ",
Chr (161),
Chr (162),
chr (163),
chr (169),
"Chr (1)");
$text = preg_replace ($search, $replace, $document);
?>