Html tag closure detection and repair, which is a little big and not fully considered. Regular expressions are not used. this applies to html files where only the start tag does not end the tag, the end tag does not have a start tag. The location of Tag closure needs to be adjusted as needed
- $ Str ='
-
-
- Content
-
-
-
- Content full
-
- This is content
- This is content
- This is cont
-
- This is content
-
- This is content ';
- $ Str_len = strlen ($ str );
- // Record start tag
- $ Pre_data = array ();
- // Record start tag position
- $ Pre_pos = array ();
- $ Last_data = array ();
- $ Error_data = array ();
- $ Error_pos = array ();
- $ I = 0;
- // Mark as <start
- $ Start_flag = false;
- While ($ I <$ str_len ){
- If ($ str [$ I] = "<" & $ str [$ I + 1]! = '/' & $ Str [$ I + 1]! = '! '){
- $ I ++;
- $ _ Tmp_str = '';
- // Mark as <start
- $ Start_flag = true;
- // Mark the blank
- $ Space_flag = false;
- While ($ str [$ I]! = ">" & $ Str [$ I]! = "'" & $ Str [$ I]! = '"' & $ Str [$ I]! = '/' & $ I <$ str_len ){
- If ($ str [$ I] = ''){
- $ Space_flag = true;
- }
- If (! $ Space_flag ){
- $ _ Tmp_str. = $ str [$ I];
- }
-
- $ I ++;
- }
- $ Pre_data [] =$ _ tmp_str;
- $ Pre_pos [] = $ I;
- } Else if ($ str [$ I] = "<" & $ str [$ I + 1] = '/'){
- $ I + = 2;
- $ _ Tmp_str = '';
- While ($ str [$ I]! = ">" & $ I <$ str_len ){
- $ _ Tmp_str. = $ str [$ I];
- $ I ++;
- }
- $ Last_data [] =$ _ tmp_str;
- // View the previous value of the start tag
- If (count ($ pre_data)> 0 ){
- $ Last_pre_node = getLastNode ($ pre_data, 1 );
-
- If ($ last_pre_node ==$ _ tmp_str ){
- // Delete the value at the corresponding position on the pairing
- Array_pop ($ pre_data );
- Array_pop ($ pre_pos );
- Array_pop ($ last_data );
- } Else {
- // There are two conditions for no pairing
- // Case 1: only closed tags, no start tags
- // Case 2: only the start tag and no closed tag are allowed.
- Array_pop ($ last_data );
- $ Error_data [] =$ _ tmp_str;
- $ Error_pos [] = $ I;
- }
- } Else {
-
- Array_pop ($ last_data );
- $ Error_data [] =$ _ tmp_str;
- $ Error_pos [] = $ I;
-
- }
- } Else if ($ str [$ I] = "<" & $ str [$ I + 1] = "! "){
- $ I ++;
- While ($ I <$ str_len ){
- If ($ str [$ I] = "-" & $ str [$ I + 1] = "-" & $ str [$ I + 2] = "> ") {
- $ I ++;
- Break;
- } Else {
- $ I ++;
- }
- }
- $ I ++;
- } Else if ($ str [$ I] = '/' & $ str [$ I + 1] = '> '){
- // Skip automatic single closed tag
- If ($ start_flag ){
- Array_pop ($ pre_data );
- Array_pop ($ pre_pos );
- $ I + = 2;
- }
- } Else if ($ str [$ I] = "/" & $ str [$ I + 1] = "*"){
- $ I ++;
- While ($ I <$ str_len ){
- If ($ str [$ I] = "*" & $ str [$ I + 1] = "/"){
- $ I ++;
- Break;
- } Else {
- $ I ++;
- }
- $ I ++;
- }
- } Else if ($ str [$ I] = "'"){
- $ I ++;
- While ($ str [$ I]! = "'" & $ I <$ str_len ){
- $ I ++;
- }
- $ I ++;
- } Else if ($ str [$ I] = '"'){
- $ I ++;
- While ($ str [$ I]! = '"' & $ I <$ str_len ){
- $ I ++;
- }
-
- $ I ++;
- } Else {
- $ I ++;
- }
- }
- // Determine the position of the start tag
- Function confirm_pre_pos ($ str, $ pre_pos ){
- $ Str_len = strlen ($ str );
- $ J = $ pre_pos;
- While ($ j <$ str_len ){
- If ($ str [$ j] = '"'){
- $ J ++;
- While ($ j <$ str_len ){
- If ($ str [$ j] = '"'){
- $ J ++;
- Break;
- }
- $ J ++;
- }
- }
- Else if ($ str [$ j] = "'"){
- $ J ++;
- While ($ j <$ str_len ){
- If ($ str [$ j] = "'"){
- $ J ++;
- Break;
- }
- $ J ++;
- }
- }
- Else if ($ str [$ j] = "> "){
- $ J ++;
- While ($ j <$ str_len ){
- If ($ str [$ j] = "<"){
- // Return to the original content location
- $ J --;
- Break;
- }
- $ J ++;
- }
- Break;
- }
- Else {
- $ J ++;
- }
- }
- Return $ j;
- }
- // Determine the position of the start tag
- Function confirm_err_pos ($ str, $ err_pos ){
- $ J = $ err_pos;
- $ J --;
- While ($ j> 0 ){
- If ($ str [$ j] = '"'){
- $ J --;
- While ($ j <$ str_len ){
- If ($ str [$ j] = '"'){
- $ J --;
- Break;
- }
- $ J --;
- }
- }
- Else if ($ str [$ j] = "'"){
- $ J --;
- While ($ j <$ str_len ){
- If ($ str [$ j] = "'"){
- $ J --;
- Break;
- }
- $ J --;
- }
- }
- Else if ($ str [$ j] = "> "){
-
- $ J ++;
- Break;
- }
- Else {
- $ J --;
- }
- }
- Return $ j;
- }
- // Obtain the reciprocal num value of the array
- Function getLastNode (array $ arr, $ num ){
- $ Len = count ($ arr );
- If ($ len> $ num ){
- Return $ arr [$ len-$ num];
- } Else {
- Return $ arr [0];
- }
- }
- // Sort the data, mainly by looking back and further checking
- Function sort_data (& $ pre_data, & $ pre_pos, & $ error_data, & $ error_pos ){
- $ Rem_key_array = array ();
- $ Rem_ I _array = array ();
- // Obtain the value to be deleted
- Foreach ($ error_data as $ key => $ value ){
- $ Count = count ($ pre_data );
- For ($ I = ($ count-1); $ I >=0; $ I --){
- If ($ pre_data [$ I] ==$ value &&! In_array ($ I, $ rem_ I _array )){
- $ Rem_key_array [] = $ key;
- $ Rem_ I _array [] = $ I;
- Break;
- }
- }
- }
- // Delete the corresponding value of the start tag
- Foreach ($ rem_key_array as $ _ item ){
- Unset ($ error_pos [$ _ item]);
- Unset ($ error_data [$ _ item]);
- }
- // Delete the end tag value
- Foreach ($ rem_ I _array as $ _ item ){
- Unset ($ pre_data [$ _ item]);
- Unset ($ pre_pos [$ _ item]);
- }
- }
- // Sort data and close tags
- Function modify_data ($ str, $ pre_data, $ pre_pos, $ error_data, $ error_pos ){
- $ Move_log = array ();
- // Only data with closed tags
- Foreach ($ error_data as $ key => $ value ){
- # Code...
- $ _ Tmp_move_count = 0;
- Foreach ($ move_log as $ pos_key => $ move_value ){
- # Code...
- If ($ error_pos [$ key] >=$ pos_key ){
- $ _ Tmp_move_count + = $ move_value;
- }
- }
- $ Data = insert_data ($ str, $ value, $ error_pos [$ key] + $ _ tmp_move_count, false );
- $ Str = $ data ['str'];
- $ Move_log [$ data ['pos'] = $ data ['move _ count'];
- }
- // Only start tag data
- Foreach ($ pre_data as $ key => $ value ){
- # Code...
- $ _ Tmp_move_count = 0;
- Foreach ($ move_log as $ pos_key => $ move_value ){
- # Code...
- If ($ pre_pos [$ key] >=$ pos_key ){
- $ _ Tmp_move_count + = $ move_value;
- }
- }
- $ Data = insert_data ($ str, $ value, $ pre_pos [$ key] + $ _ tmp_move_count, true );
- $ Str = $ data ['str'];
- $ Move_log [$ data ['pos'] = $ data ['move _ count'];
- }
- Return $ str;
- }
- // Insert data. $ type indicates the data insertion method.
- Function insert_data ($ str, $ insert_data, $ pos, $ type ){
- $ Len = strlen ($ str );
- // Start tag type
- If ($ type = true ){
- $ Move_count = strlen ($ insert_data) + 3;
- $ Pos = confirm_pre_pos ($ str, $ pos );
- $ Pre_str = substr ($ str, 0, $ pos );
- $ End_str = substr ($ str, $ pos );
- $ Mid_str =" ";
- // Closed tag type
- } Else {
- $ Pos = confirm_err_pos ($ str, $ pos );
- $ Move_count = strlen ($ insert_data) + 2;
- $ Pre_str = substr ($ str, 0, $ pos );
- $ End_str = substr ($ str, $ pos );
- $ Mid_str = "<". $ insert_data. "> ";
- }
- $ Str = $ pre_str. $ mid_str. $ end_str;
- Return array ('str' => $ str, 'pos' => $ pos, 'Move _ count' => $ move_count );
- }
- Sort_data ($ pre_data, $ pre_pos, $ error_data, $ error_pos );
- $ New_str = modify_data ($ str, $ pre_data, $ pre_pos, $ error_data, $ error_pos );
- Echo $ new_str;
- // Print_r ($ pre_data );
- // Print_r ($ pre_pos );
- // Print_r ($ error_data );
- // Print_r ($ error_pos );
- // Echo strlen ($ str );
- // Foreach ($ pre_pos as $ value ){
- // $ Value = confirm_pre_pos ($ str, $ value );
- // For ($ I = $ value-5; $ I <= $ value; $ I ++ ){
- // Echo $ str [$ I];
- //}
- // Echo "\ n ";
- //}
- // Foreach ($ error_pos as $ value ){
- // For ($ I = $ value-5; $ I <= $ value; $ I ++ ){
- // Echo $ str [$ I];
- //}
- // Echo "\ n ";
- //}
- ?>
|