Html tag closure detection and repair

Source: Internet
Author: User
Html tag closure detection and repair
Html tag closure detection and repair, which is a little big and not fully considered. Regular expressions are not used. this applies to html files where only the start tag does not end the tag, the end tag does not have a start tag. The location of Tag closure needs to be adjusted as needed

  1. $ Str ='
  2. Content
  3. Content full
  4. This is content
  5. This is content
  6. This is cont
  7. This is content
  8. This is content ';
  9. $ Str_len = strlen ($ str );
  10. // Record start tag
  11. $ Pre_data = array ();
  12. // Record start tag position
  13. $ Pre_pos = array ();
  14. $ Last_data = array ();
  15. $ Error_data = array ();
  16. $ Error_pos = array ();
  17. $ I = 0;
  18. // Mark as <start
  19. $ Start_flag = false;
  20. While ($ I <$ str_len ){
  21. If ($ str [$ I] = "<" & $ str [$ I + 1]! = '/' & $ Str [$ I + 1]! = '! '){
  22. $ I ++;
  23. $ _ Tmp_str = '';
  24. // Mark as <start
  25. $ Start_flag = true;
  26. // Mark the blank
  27. $ Space_flag = false;
  28. While ($ str [$ I]! = ">" & $ Str [$ I]! = "'" & $ Str [$ I]! = '"' & $ Str [$ I]! = '/' & $ I <$ str_len ){
  29. If ($ str [$ I] = ''){
  30. $ Space_flag = true;
  31. }
  32. If (! $ Space_flag ){
  33. $ _ Tmp_str. = $ str [$ I];
  34. }
  35. $ I ++;
  36. }
  37. $ Pre_data [] =$ _ tmp_str;
  38. $ Pre_pos [] = $ I;
  39. } Else if ($ str [$ I] = "<" & $ str [$ I + 1] = '/'){
  40. $ I + = 2;
  41. $ _ Tmp_str = '';
  42. While ($ str [$ I]! = ">" & $ I <$ str_len ){
  43. $ _ Tmp_str. = $ str [$ I];
  44. $ I ++;
  45. }
  46. $ Last_data [] =$ _ tmp_str;
  47. // View the previous value of the start tag
  48. If (count ($ pre_data)> 0 ){
  49. $ Last_pre_node = getLastNode ($ pre_data, 1 );
  50. If ($ last_pre_node ==$ _ tmp_str ){
  51. // Delete the value at the corresponding position on the pairing
  52. Array_pop ($ pre_data );
  53. Array_pop ($ pre_pos );
  54. Array_pop ($ last_data );
  55. } Else {
  56. // There are two conditions for no pairing
  57. // Case 1: only closed tags, no start tags
  58. // Case 2: only the start tag and no closed tag are allowed.
  59. Array_pop ($ last_data );
  60. $ Error_data [] =$ _ tmp_str;
  61. $ Error_pos [] = $ I;
  62. }
  63. } Else {
  64. Array_pop ($ last_data );
  65. $ Error_data [] =$ _ tmp_str;
  66. $ Error_pos [] = $ I;
  67. }
  68. } Else if ($ str [$ I] = "<" & $ str [$ I + 1] = "! "){
  69. $ I ++;
  70. While ($ I <$ str_len ){
  71. If ($ str [$ I] = "-" & $ str [$ I + 1] = "-" & $ str [$ I + 2] = "> ") {
  72. $ I ++;
  73. Break;
  74. } Else {
  75. $ I ++;
  76. }
  77. }
  78. $ I ++;
  79. } Else if ($ str [$ I] = '/' & $ str [$ I + 1] = '> '){
  80. // Skip automatic single closed tag
  81. If ($ start_flag ){
  82. Array_pop ($ pre_data );
  83. Array_pop ($ pre_pos );
  84. $ I + = 2;
  85. }
  86. } Else if ($ str [$ I] = "/" & $ str [$ I + 1] = "*"){
  87. $ I ++;
  88. While ($ I <$ str_len ){
  89. If ($ str [$ I] = "*" & $ str [$ I + 1] = "/"){
  90. $ I ++;
  91. Break;
  92. } Else {
  93. $ I ++;
  94. }
  95. $ I ++;
  96. }
  97. } Else if ($ str [$ I] = "'"){
  98. $ I ++;
  99. While ($ str [$ I]! = "'" & $ I <$ str_len ){
  100. $ I ++;
  101. }
  102. $ I ++;
  103. } Else if ($ str [$ I] = '"'){
  104. $ I ++;
  105. While ($ str [$ I]! = '"' & $ I <$ str_len ){
  106. $ I ++;
  107. }
  108. $ I ++;
  109. } Else {
  110. $ I ++;
  111. }
  112. }
  113. // Determine the position of the start tag
  114. Function confirm_pre_pos ($ str, $ pre_pos ){
  115. $ Str_len = strlen ($ str );
  116. $ J = $ pre_pos;
  117. While ($ j <$ str_len ){
  118. If ($ str [$ j] = '"'){
  119. $ J ++;
  120. While ($ j <$ str_len ){
  121. If ($ str [$ j] = '"'){
  122. $ J ++;
  123. Break;
  124. }
  125. $ J ++;
  126. }
  127. }
  128. Else if ($ str [$ j] = "'"){
  129. $ J ++;
  130. While ($ j <$ str_len ){
  131. If ($ str [$ j] = "'"){
  132. $ J ++;
  133. Break;
  134. }
  135. $ J ++;
  136. }
  137. }
  138. Else if ($ str [$ j] = "> "){
  139. $ J ++;
  140. While ($ j <$ str_len ){
  141. If ($ str [$ j] = "<"){
  142. // Return to the original content location
  143. $ J --;
  144. Break;
  145. }
  146. $ J ++;
  147. }
  148. Break;
  149. }
  150. Else {
  151. $ J ++;
  152. }
  153. }
  154. Return $ j;
  155. }
  156. // Determine the position of the start tag
  157. Function confirm_err_pos ($ str, $ err_pos ){
  158. $ J = $ err_pos;
  159. $ J --;
  160. While ($ j> 0 ){
  161. If ($ str [$ j] = '"'){
  162. $ J --;
  163. While ($ j <$ str_len ){
  164. If ($ str [$ j] = '"'){
  165. $ J --;
  166. Break;
  167. }
  168. $ J --;
  169. }
  170. }
  171. Else if ($ str [$ j] = "'"){
  172. $ J --;
  173. While ($ j <$ str_len ){
  174. If ($ str [$ j] = "'"){
  175. $ J --;
  176. Break;
  177. }
  178. $ J --;
  179. }
  180. }
  181. Else if ($ str [$ j] = "> "){
  182. $ J ++;
  183. Break;
  184. }
  185. Else {
  186. $ J --;
  187. }
  188. }
  189. Return $ j;
  190. }
  191. // Obtain the reciprocal num value of the array
  192. Function getLastNode (array $ arr, $ num ){
  193. $ Len = count ($ arr );
  194. If ($ len> $ num ){
  195. Return $ arr [$ len-$ num];
  196. } Else {
  197. Return $ arr [0];
  198. }
  199. }
  200. // Sort the data, mainly by looking back and further checking
  201. Function sort_data (& $ pre_data, & $ pre_pos, & $ error_data, & $ error_pos ){
  202. $ Rem_key_array = array ();
  203. $ Rem_ I _array = array ();
  204. // Obtain the value to be deleted
  205. Foreach ($ error_data as $ key => $ value ){
  206. $ Count = count ($ pre_data );
  207. For ($ I = ($ count-1); $ I >=0; $ I --){
  208. If ($ pre_data [$ I] ==$ value &&! In_array ($ I, $ rem_ I _array )){
  209. $ Rem_key_array [] = $ key;
  210. $ Rem_ I _array [] = $ I;
  211. Break;
  212. }
  213. }
  214. }
  215. // Delete the corresponding value of the start tag
  216. Foreach ($ rem_key_array as $ _ item ){
  217. Unset ($ error_pos [$ _ item]);
  218. Unset ($ error_data [$ _ item]);
  219. }
  220. // Delete the end tag value
  221. Foreach ($ rem_ I _array as $ _ item ){
  222. Unset ($ pre_data [$ _ item]);
  223. Unset ($ pre_pos [$ _ item]);
  224. }
  225. }
  226. // Sort data and close tags
  227. Function modify_data ($ str, $ pre_data, $ pre_pos, $ error_data, $ error_pos ){
  228. $ Move_log = array ();
  229. // Only data with closed tags
  230. Foreach ($ error_data as $ key => $ value ){
  231. # Code...
  232. $ _ Tmp_move_count = 0;
  233. Foreach ($ move_log as $ pos_key => $ move_value ){
  234. # Code...
  235. If ($ error_pos [$ key] >=$ pos_key ){
  236. $ _ Tmp_move_count + = $ move_value;
  237. }
  238. }
  239. $ Data = insert_data ($ str, $ value, $ error_pos [$ key] + $ _ tmp_move_count, false );
  240. $ Str = $ data ['str'];
  241. $ Move_log [$ data ['pos'] = $ data ['move _ count'];
  242. }
  243. // Only start tag data
  244. Foreach ($ pre_data as $ key => $ value ){
  245. # Code...
  246. $ _ Tmp_move_count = 0;
  247. Foreach ($ move_log as $ pos_key => $ move_value ){
  248. # Code...
  249. If ($ pre_pos [$ key] >=$ pos_key ){
  250. $ _ Tmp_move_count + = $ move_value;
  251. }
  252. }
  253. $ Data = insert_data ($ str, $ value, $ pre_pos [$ key] + $ _ tmp_move_count, true );
  254. $ Str = $ data ['str'];
  255. $ Move_log [$ data ['pos'] = $ data ['move _ count'];
  256. }
  257. Return $ str;
  258. }
  259. // Insert data. $ type indicates the data insertion method.
  260. Function insert_data ($ str, $ insert_data, $ pos, $ type ){
  261. $ Len = strlen ($ str );
  262. // Start tag type
  263. If ($ type = true ){
  264. $ Move_count = strlen ($ insert_data) + 3;
  265. $ Pos = confirm_pre_pos ($ str, $ pos );
  266. $ Pre_str = substr ($ str, 0, $ pos );
  267. $ End_str = substr ($ str, $ pos );
  268. $ Mid_str =" ";
  269. // Closed tag type
  270. } Else {
  271. $ Pos = confirm_err_pos ($ str, $ pos );
  272. $ Move_count = strlen ($ insert_data) + 2;
  273. $ Pre_str = substr ($ str, 0, $ pos );
  274. $ End_str = substr ($ str, $ pos );
  275. $ Mid_str = "<". $ insert_data. "> ";
  276. }
  277. $ Str = $ pre_str. $ mid_str. $ end_str;
  278. Return array ('str' => $ str, 'pos' => $ pos, 'Move _ count' => $ move_count );
  279. }
  280. Sort_data ($ pre_data, $ pre_pos, $ error_data, $ error_pos );
  281. $ New_str = modify_data ($ str, $ pre_data, $ pre_pos, $ error_data, $ error_pos );
  282. Echo $ new_str;
  283. // Print_r ($ pre_data );
  284. // Print_r ($ pre_pos );
  285. // Print_r ($ error_data );
  286. // Print_r ($ error_pos );
  287. // Echo strlen ($ str );
  288. // Foreach ($ pre_pos as $ value ){
  289. // $ Value = confirm_pre_pos ($ str, $ value );
  290. // For ($ I = $ value-5; $ I <= $ value; $ I ++ ){
  291. // Echo $ str [$ I];
  292. //}
  293. // Echo "\ n ";
  294. //}
  295. // Foreach ($ error_pos as $ value ){
  296. // For ($ I = $ value-5; $ I <= $ value; $ I ++ ){
  297. // Echo $ str [$ I];
  298. //}
  299. // Echo "\ n ";
  300. //}
  301. ?>

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.