/*************************************** ***************************** * Original File name: Filter1.php * File description: filter HTML strings * File writing: xuefengal@sohu.com * Process description: * When the required parameters are passed to the filter function, the filter () function first * Use preg_match_all () to filter all tags in the string () * Obtain it, and then loop through the match array of preg_match_all, through preg_split () * The function splits each label into the form of "left attribute" = "right value", and * Loop in the left attribute array. the preg_split () matching content is retrieved to form * Value that can be replaced, and then the corresponding tag in the string is replaced by str_replcae () * Function list: * Function filter (& $ str, $ tag, $ keep_attribute) * Function match ($ reg, & $ str, $ arr) * Function show ($ str, $ title = '', $ debug = True) * Example: * // Obtain the homepage of Sohu News * $ Str = @ file_get_content ("http://news.sohu.com "); * // Filter * Filter ($ str, 'A', 'href, target, alt '); * Filter ($ str, 'P', 'align '); * Show ($ str, 'filtered content '); **************************************** ****************************/
$ Start_time = array_sum (explode ("", microtime ()));
$ Str = <HTML Site Site B Site c Site d Site e
Adasdfasdf
Asdfasdfasdfasdf
Asdfasdfasdf
Asdfadsfasdf Asdfasdfadf Asdfasdf HTML;
// Display the original string Show ($ str, 'HTML ');
/*************************************** **************************************** **************************************** **************************************** ********/ // Filter Filter ($ str, 'A', 'href, target, alt '); Filter ($ str, 'P', 'align '); Filter ($ str, 'font', 'Color, alt ');
// Display the filtered content Show ($ str, 'result ');
// Script running time $ Run_time = array_sum (explode ("", microtime ()-$ start_time; Echo (' Script Run Time: '. $ run_time .' ');
/** * Description: filters HTML strings. * Parameters: * $ Str: The HTML string to be filtered * $ Tag: the type of the filtered tag. * $ Keep_attribute: * Attributes to be retained. the parameter format is * Href * Href, target, alt * Array ('href ', 'target', 'alt ') */ Function filter (& $ str, $ tag, $ keep_attribute ){
// Check the parameter transfer method of the attribute to be retained If (! Is_array ($ keep_attribute )){ // When the array is not passed in, determine whether the parameter contains, No. If (strpos ($ keep_attribute ,',')){ // Contains, number, splitting parameter string as an array $ Keep_attribute = explode (',', $ keep_attribute ); } Else { // A pure string to construct an array $ Keep_attribute = array ($ keep_attribute ); } }
Echo ("· filter [$ tag] tags, keep attributes:". implode (',', $ keep_attribute ).' ');
// Obtain all tags to be processed $ Pattern = "/<$ tag (. *) <\/$ tag>/I "; Preg_match_all ($ pattern, $ str, $ out );
// Process each tag cyclically Foreach ($ out [1] as $ key => $ val ){ // Several = $ Cnt = preg_split ('/* =/I', $ val ); $ Cnt = count ($ cnt)-1;
// Construct a regular expression $ Pattern = ''; For ($ I = 1; $ I <= $ cnt; $ I ++ ){
$ Pattern. = '(. * = .*)'; } // Complete regular expression formation, such as/(. * <\/a>/I style $ Pattern = "/(<$ tag) $ pattern (>. * <\/$ tag>)/I ";
// Get reserved attributes $ Replacement = match ($ pattern, $ out [0] [$ key], $ keep_attribute );
// Replace $ Str = str_replace ($ out [0] [$ key], $ replacement, $ str ); } }
/** * Description: creates tags and retains the attributes to be retained. * Parameter: $ reg: expression of pattern and preg_match * $ Str: string, html string * $ Arr: array, the attribute to be retained * Return value: * Return the retained tag, as shown in figure * E.com */ Function match ($ reg, & $ str, $ arr ){
// Match Preg_match ($ reg, $ str, $ out );
// Retrieve the reserved attributes $ Keep_attribute = ''; Foreach ($ arr as $ k1 => $ v1 ){ // Defines the array of attributes to be retained Foreach ($ out as $ k2 => $ v2 ){ // Matching = array $ Attribute = trim (substr ($ v2, 0, strpos ($ v2, '= '))); // = Previous If ($ v1 = $ attribute ){ // The attributes to be retained are the same as those before = of the matched values. $ Keep_attribute. = $ v2; // Save the value of this matching part } }
}
// Construct the return value. The structure is as follows: aadd $ Keep_attribute = $ out [1]. $ keep_attribute. ($ out [count ($ out)-1]); // Return value Return $ keep_attribute; }
/** * Display string content */ Function show ($ str, $ title = '', $ debug = True ){
If ($ debug ){ If (is_array ($ str )){ $ Str = print_r ($ str, True ); } $ TxtRows = count (explode ("\ n", $ str) + 1; Echo ($ title .': '. $ Str .'
'); }
}
?> |