Php solution to DOM garbled code example code preface
DOM is a new xml and html processing class in php. it can operate the DOM tree as conveniently as javascript. More information about XML processing is provided on the Internet, today, this article introduces php's solution to DOM garbled text. let's not talk about it. let's look at the solution below.
The solution is as follows:
/*** Request url page information * @ param str $ url * @ return str mixed | boolean */function curl_get ($ url) {$ curl = curl_init (); curl_setopt ($ curl, CURLOPT_URL, $ url); curl_setopt ($ curl, scheme, 1); // 302 jump curl_setopt ($ curl, CURLOPT_FOLLOWLOCATION, 1); curl_setopt ($ curl, CURLOPT_USERAGENT, 'mozilla/5.0 (Windows NT 6.1; WOW64; rv: 47.0) Gecko/20100101 Firefox/100'); curl_setopt ($ curl, CURLOPT_REFERER, $ url ); $ data = curl_exec ($ curl); $ code = curl_getinfo ($ curl, CURLINFO_HTTP_CODE); // output request status code curl_close ($ curl); if (200 = $ code) {// fix garbled if (preg_match ('#
] * Charset = "? Gb2312 "[^>] *> # ', $ data) {$ data = iconv (" gb2312 "," UTF-8 // IGNORE ", $ data ); $ data = preg_replace ('#
] * Charset = "? Gb2312 "[^>] *> # is ','
', $ Data);} if (! Preg_match ('#
] *> # Is ', $ data) {$ data = str_replace ('','
', $ Data);} if (preg_match ('#
] *> # Is ', $ data) {$ data = preg_replace ('#
] *> # Is ','
', $ Data) ;}return $ data ;}else {return false ;}}
/*** Get the DOMDocument object * @ param str $ url * @ return boolean | DOM */function getDom ($ url) {$ html_content = curl_get ($ url ); if (empty ($ html_content) {// saveLog ($ url, 'request failed'); return false ;}$ dom = new DOMDocument ('1. 0 ', 'utf-8'); libxml_use_internal_errors (true); $ dom-> loadHTML ($ html_content); return $ dom ;}
$html_content = mb_convert_encoding($html_content, 'UTF-8', 'gb2312');