HTML tag closure detection and Repair

Source: Internet
Author: User
HTML tag closure detection and repair, said a bit large, and did not consider a very complete, no use of regular expressions, applicable to the HTML file only the start tag does not have the end tag, there is an end tag is not the beginning of the tag case. The position of the label closure needs to be adjusted according to demand
  1. $str = '
  2. ">
  3. Content
  4. Content full
  5. This is content
  6. This is content
  7. This is cont
  8. This is content
  9. This is content ';
  10. $str _len = strlen ($STR);
  11. Record start tag
  12. $pre _data = Array ();
  13. Record Start label position
  14. $pre _pos = Array ();
  15. $last _data = Array ();
  16. $error _data = Array ();
  17. $error _pos = Array ();
  18. $i = 0;
  19. Mark as < start
  20. $start _flag = false;
  21. while ($i < $str _len) {
  22. if ($str [$i]== < && $str [$i +1]!= '/' && $str [$i +1]!= '! ') {
  23. $i + +;
  24. $_tmp_str = ";
  25. Mark as < start
  26. $start _flag = true;
  27. Mark Blank
  28. $space _flag = false;
  29. while ($str [$i]!= > && $str [$i]!= "'" && $str [$i]!= ' "' && $str [$i]! = '/' && $i <$ Str_len) {
  30. if ($str [$i]== ') {
  31. $space _flag = true;
  32. }
  33. if (! $space _flag) {
  34. $_tmp_str. = $str [$i];
  35. }
  36. $i + +;
  37. }
  38. $pre _data[] = $_tmp_str;
  39. $pre _pos[] = $i;
  40. } else if ($str [$i]== < && $str [$i +1]== '/') {
  41. $i + = 2;
  42. $_tmp_str = ";
  43. while ($str [$i]!= > && $i < $str _len) {
  44. $_tmp_str. = $str [$i];
  45. $i + +;
  46. }
  47. $last _data[] = $_tmp_str;
  48. View the previous value of the start tag
  49. if (count ($pre _data) >0) {
  50. $last _pre_node = Getlastnode ($pre _data, 1);
  51. if ($last _pre_node = = $_tmp_str) {
  52. Pairing, delete the value of the corresponding position
  53. Array_pop ($pre _data);
  54. Array_pop ($pre _pos);
  55. Array_pop ($last _data);
  56. } else {
  57. There are two cases of no pairing.
  58. Case one: Only closed tags, no start tag
  59. Case two: Only start tag, no closed tag
  60. Array_pop ($last _data);
  61. $error _data[] = $_tmp_str;
  62. $error _pos[] = $i;
  63. }
  64. } else {
  65. Array_pop ($last _data);
  66. $error _data[] = $_tmp_str;
  67. $error _pos[] = $i;
  68. }
  69. }else if ($str [$i]== < && $str [$i +1]== "!") {
  70. $i + +;
  71. while ($i < $str _len) {
  72. if ($str [$i]== "-" && $str [$i +1]== "-" && $str [$i +2]== ">") {
  73. $i + +;
  74. Break
  75. } else {
  76. $i + +;
  77. }
  78. }
  79. $i + +;
  80. }else if ($str [$i]== '/' && $str [$i +1]== ' > ') {
  81. Skip Automatic Single closed labels
  82. if ($start _flag) {
  83. Array_pop ($pre _data);
  84. Array_pop ($pre _pos);
  85. $i +=2;
  86. }
  87. }else if ($str [$i]== "/" && $str [$i +1]== "*") {
  88. $i + +;
  89. while ($i < $str _len) {
  90. if ($str [$i]== "*" && $str [$i +1]== "/") {
  91. $i + +;
  92. Break
  93. } else {
  94. $i + +;
  95. }
  96. $i + +;
  97. }
  98. }else if ($str [$i]== "'") {
  99. $i + +;
  100. while ($str [$i]!= "'" && $i < $str _len) {
  101. $i + +;
  102. }
  103. $i + +;
  104. } else if ($str [$i]== ' ") {
  105. $i + +;
  106. while ($str [$i]!= ' "' && $i < $str _len) {
  107. $i + +;
  108. }
  109. $i + +;
  110. } else {
  111. $i + +;
  112. }
  113. }
  114. Determine the location of the start tag
  115. function Confirm_pre_pos ($str, $pre _pos) {
  116. $str _len = strlen ($STR);
  117. $j = $pre _pos;
  118. while ($j < $str _len) {
  119. if ($str [$j] = = ' "') {
  120. $j + +;
  121. while ($j < $str _len) {
  122. if ($str [$j]== ' "') {
  123. $j + +;
  124. Break
  125. }
  126. $j + +;
  127. }
  128. }
  129. else if ($str [$j] = = "'") {
  130. $j + +;
  131. while ($j < $str _len) {
  132. if ($str [$j]== "'") {
  133. $j + +;
  134. Break
  135. }
  136. $j + +;
  137. }
  138. }
  139. else if ($str [$j]== ">") {
  140. $j + +;
  141. while ($j < $str _len) {
  142. if ($str [$j]== "<") {
  143. Back to original content location
  144. $j--;
  145. Break
  146. }
  147. $j + +;
  148. }
  149. Break
  150. }
  151. else {
  152. $j + +;
  153. }
  154. }
  155. return $j;
  156. }
  157. Determine the location of the start tag
  158. function Confirm_err_pos ($str, $err _pos) {
  159. $j = $err _pos;
  160. $j--;
  161. while ($j > 0) {
  162. if ($str [$j] = = ' "') {
  163. $j--;
  164. while ($j < $str _len) {
  165. if ($str [$j]== ' "') {
  166. $j--;
  167. Break
  168. }
  169. $j--;
  170. }
  171. }
  172. else if ($str [$j] = = "'") {
  173. $j--;
  174. while ($j < $str _len) {
  175. if ($str [$j]== "'") {
  176. $j--;
  177. Break
  178. }
  179. $j--;
  180. }
  181. }
  182. else if ($str [$j]== ">") {
  183. $j + +;
  184. Break
  185. }
  186. else {
  187. $j--;
  188. }
  189. }
  190. return $j;
  191. }
  192. Gets the value of the penultimate number of the array
  193. function Getlastnode (array $arr, $num) {
  194. $len = count ($arr);
  195. if ($len > $num) {
  196. return $arr [$len-$num];
  197. } else {
  198. return $arr [0];
  199. }
  200. }
  201. Collate the data, primarily looking backwards, for further inspection
  202. Function Sort_data (& $pre _data, & $pre _pos, & $error _data, & $error _pos) {
  203. $rem _key_array = Array ();
  204. $rem _i_array = Array ();
  205. Get the value you want to delete
  206. foreach ($error _data as $key = + $value) {
  207. $count = count ($pre _data);
  208. for ($i = ($count-1); $i >=0; $i-) {
  209. if ($pre _data[$i] = = $value &&!in_array ($i, $rem _i_array)) {
  210. $rem _key_array[] = $key;
  211. $rem _i_array[] = $i;
  212. Break
  213. }
  214. }
  215. }
  216. Remove the corresponding value from the start tag
  217. foreach ($rem _key_array as $_item) {
  218. Unset ($error _pos[$_item]);
  219. Unset ($error _data[$_item]);
  220. }
  221. Delete the corresponding value of the end tag
  222. foreach ($rem _i_array as $_item) {
  223. Unset ($pre _data[$_item]);
  224. Unset ($pre _pos[$_item]);
  225. }
  226. }
  227. Organize data, close labels
  228. function Modify_data ($str, $pre _data, $pre _pos, $error _data, $error _pos) {
  229. $move _log = Array ();
  230. Only closed-label data
  231. foreach ($error _data as $key = + $value) {
  232. # code ...
  233. $_tmp_move_count = 0;
  234. foreach ($move _log as $pos _key = $move _value) {
  235. # code ...
  236. if ($error _pos[$key]>= $pos _key) {
  237. $_tmp_move_count + = $move _value;
  238. }
  239. }
  240. $data = Insert_data ($str, $value, $error _pos[$key]+$_tmp_move_count, false);
  241. $STR = $data [' str '];
  242. $move _log[$data [' pos ']] = $data [' Move_count '];
  243. }
  244. Data only from the start tag
  245. foreach ($pre _data as $key = + $value) {
  246. # code ...
  247. $_tmp_move_count = 0;
  248. foreach ($move _log as $pos _key = $move _value) {
  249. # code ...
  250. if ($pre _pos[$key]>= $pos _key) {
  251. $_tmp_move_count + = $move _value;
  252. }
  253. }
  254. $data = Insert_data ($str, $value, $pre _pos[$key]+$_tmp_move_count, True);
  255. $STR = $data [' str '];
  256. $move _log[$data [' pos ']] = $data [' Move_count '];
  257. }
  258. return $str;
  259. }
  260. Insert data, $type indicate how data is inserted
  261. function Insert_data ($str, $insert _data, $pos, $type) {
  262. $len = strlen ($STR);
  263. Start tag Type
  264. if ($type ==true) {
  265. $move _count = strlen ($insert _data) +3;
  266. $pos = Confirm_pre_pos ($str, $pos);
  267. $pre _str = substr ($str, 0, $pos);
  268. $end _str = substr ($str, $pos);
  269. $mid _str = " ";
  270. Closed label Type
  271. } else {
  272. $pos = Confirm_err_pos ($str, $pos);
  273. $move _count = strlen ($insert _data) + 2;
  274. $pre _str = substr ($str, 0, $pos);
  275. $end _str = substr ($str, $pos);
  276. $mid _str = "<". $insert _data. ">";
  277. }
  278. $str = $pre _str. $mid _str. $end _str;
  279. return Array (' str ' = $str, ' pos ' = = $pos, ' move_count ' + $move _count);
  280. }
  281. Sort_data ($pre _data, $pre _pos, $error _data, $error _pos);
  282. $new _str = Modify_data ($str, $pre _data, $pre _pos, $error _data, $error _pos);
  283. echo $new _str;
  284. Print_r ($pre _data);
  285. Print_r ($pre _pos);
  286. Print_r ($error _data);
  287. Print_r ($error _pos);
  288. echo strlen ($STR);
  289. foreach ($pre _pos as $value) {
  290. $value = Confirm_pre_pos ($str, $value);
  291. for ($i = $value-5; $i <= $value; $i + +) {
  292. echo $str [$i];
  293. // }
  294. echo "\ n";
  295. // }
  296. foreach ($error _pos as $value) {
  297. for ($i = $value-5; $i <= $value; $i + +) {
  298. echo $str [$i];
  299. // }
  300. echo "\ n";
  301. // }
  302. ?>
Copy Code
  • Related Article

    Contact Us

    The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

    If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

    A Free Trial That Lets You Build Big!

    Start building with 50+ products and up to 12 months usage for Elastic Compute Service

    • Sales Support

      1 on 1 presale consultation

    • After-Sales Support

      24/7 Technical Support 6 Free Tickets per Quarter Faster Response

    • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.