Search engine website indexing check program implemented by Asp. Net and asp, asp.net Search Engine
Use asp.net or asp to check whether an article is indexed by search engines such as Baidu, Google, and sogou.
Implementation principle: directly search for the url address of your article (without the protocol, but the above Protocol is also good, the code will automatically remove the Protocol content). If indexed, the search result will be returned, otherwise, the system prompts that no information is found.
Asp. Net checks whether Baidu, Google, and sogou search engines include the source code of the article URL:
Using System; using System. net; using System. text; using System. IO; using System. web; public class SearchEngineIndex {public static string [] urls = {// search engine check address" http://www.baidu.com/s?ie=utf-8&wd= ", // Baidu index url check address" https://www.google.com.hk/search?q= ", // Google index url check address" http://www.sogou.com/web?ie=utf8&query= "// Sogou index url check address}, noFindKeyword = {" sorry, not found with "," cannot find and your query "," not included? "}; // Keyword of the url address not indexed by the search engine /// <summary> /// get the response code /// </summary> /// <param name = "contenttype "> </param> // <returns> </returns> private static Encoding GetEncoding (string contenttype) {if (! String. IsNullOrEmpty (contenttype) {contenttype = contenttype. ToLower (); if (contenttype. IndexOf ("gb2312 ")! =-1 | contenttype. IndexOf ("gbk ")! =-1) return Encoding. GetEncoding (936); if (contenttype. IndexOf ("big5 ")! =-1) return Encoding. getEncoding (950);} return Encoding. UTF8;} // <summary> // use the HttpWebRequest object, automatically recognize character sets /// </summary> /// <param name = "url"> </param> /// <param name = "addUseragent"> Add UserAgent, prevent interception when collecting other websites </param> // <returns> </returns> public static string GetHtml (string url, bool addUseragent) {HttpWebRequest request = (HttpWebRequest) httpWebRequest. create (url); if (addUseragent) reque St. userAgent = "Googlebot | Feedfetcher-Google | Baiduspider"; string html = null; try {HttpWebResponse response = (HttpWebResponse) request. getResponse (); StreamReader srd = new StreamReader (response. getResponseStream (), GetEncoding (response. contentType); html = srd. readToEnd (); srd. close (); response. close () ;}catch {} return html ;} /// <summary> /// check whether a url is indexed by the search engine /// </summary> /// <param nam E = "url"> url </param> // <param name = "engin"> 0: Baidu 1: Google 2: sogou, other search engines, such as bing and 360, do not directly obtain the results displayed on the web site, do not check </param> /// <returns> </returns> public static bool CheckIndex (string url, int engin) {if (string. isNullOrEmpty (url) return false; if (engin <0 | engin> 2) engin = 0; url = urls [engin] + HttpUtility. urlEncode (url. toLower (). replace ("http ://",""). replace ("https: //", ""); bool r = true; str Ing html = GetHtml (url, true); if (html = null | html. IndexOf (noFindKeyword [engin])! =-1) r = false; return r ;}// call method example SearchEngineIndex. checkIndex ("www.jb51.net/article/20101014/2902.aspx", 0); // check Baidu index SearchEngineIndex. checkIndex ("www.jb51.net/article/20101014/2902.aspx", 1); // check the Google index SearchEngineIndex. checkIndex ("www.jb51.net/article/20101014/2902.aspx", 2); // check the sogou Index
Asp checks Baidu, Google, and sogou search engines for source code:
<% Class SearchEnginIndex dim urls, noFindKeyword private sub Class_Initialize 'Baidu, Google, sogou url index query address urls = array (" http://www.baidu.com/s?ie=utf-8&wd= "," https://www.google.com.hk/search?q= "," http://www.sogou.com/web?ie=utf8&query= ") 'Nofindkeyword = array (" sorry, not found with "," cannot find and your query "," not included? ") End sub private function GetEncoding (contenttype) contenttype = lcase (contenttype) if instr (contenttype," gb2312 ") <> 0 and instr (contenttype," gbk ") <> 0 then GetEncoding = "gb2312" elseif instr (contenttype, "big5 ") <> 0 then GetEncoding = "big5" else GetEncoding = "UTF-8" end if end function private function BinToString (bin, encoding) 'convert binary stream data to the corresponding string content based on encoding dim obj set obj = Server. createObject ("Adodb. stream ") obj. type = 1: obj. mode = 3: obj. open obj. write bin obj. position = 0: obj. type = 2: obj. charset = encoding BinToString = obj. readText obj. close: set obj = nothing end function public function GetHtml (url) dim xhr set xhr = server. createObject ("microsoft. xmlhttp ") xhr. open "get", url, false xhr. send encoding = GetEncoding (xhr. getResponseHeader ("content-type") response. charSet = encoding GetHtml = BinToString (xhr. responsebody, encoding) set xhr = nothing end function public function CheckIndex (url, engin) if len (url) = 0 then exit function if engin <0 or engin> 2 then engin = 1 url = urls (engin) & server. URLEncode (url) dim html = GetHtml (url) CheckIndex = instr (html, NoFindKeyword (engin) = 0 End functionend Classset sei = new SearchEnginIndexresponse. write sei. checkIndex ("www.jb51.net/article/20101014/2902.aspx", 0) 'Baidu index response. write sei. checkIndex ("www.jb51.net/article/20101014/2902.aspx", 1) 'Google index response. write sei. checkIndex ("www.jb51.net/article/20101014/2902.aspx", 2) 'sogou index set sei = nothing %>
Html Web pages are easy to include and PHP or ASP Web pages are easy to include,
In general, this statement is partially correct.
When indexing website pages by a spider, the search engine crawls through links on the network. As long as there is a link pointing to the target page, whether it is a dynamic page or a static page, the spider can arrive.
It is believed that static pages are better indexed than dynamic pages because crawlers must consider the indexing efficiency while dynamic pages require additional time to generate static pages, the number of spider entries per unit time is relatively small.
On the other hand, due to technical reasons, the url of a dynamic web page often contains some parameters, which may mislead the spider, resulting in unsatisfactory indexing.
We recommend that you use ASP or PHP to create dynamic websites for future maintenance.
Reference: www.redyz.com/newview.php? Id = 39
How does aspnet implement a search engine?
IO + Multithreading