PHP exchange group, PHP blog, PHP technical exchange, PHP search engine, PHP search engine source code, qingfeng
For simple PHP search engine source code, you need to enable PHP cURL extension. Function: search a website to obtain basic website information.
And extract all the links of the website.
Class Engine {
Private $ _ url = ''; // URL
Private $ _ sites = ''; // page information
Public function _ construct ($ url ){
$ This-> _ url = $ url;
}
// Start the engine
Public function start (){
// $ Content = $ this-> socketOpen ($ this-> _ url );
$ Content = $ this-> getContent ($ this-> _ url );
$ This-> _ sites ['URL'] = $ this-> _ url;
$ This-> _ sites ['meta'] = $ this-> getMeta ($ content );
$ This-> _ sites ['title'] = $ this-> getTitle ($ content );
// $ This-> _ sites ['detail'] = $ this-> getDetail ($ content );
$ This-> _ sites ['link'] = $ this-> getLinks ($ content );
}
// Obtain the meta content
Public function getMeta ($ content ){
$ File = 'metacache ';
File_put_contents ($ file, $ content );
$ Meta = get_meta_tags ($ file );
Return $ meta;
}
// Obtain the body content
Public function getDetail ($ content ){
Preg_match ('/(.*?) <\/Body>/I ', $ content, $ matchs );
$ Body = $ this-> stripHTML ($ matchs [1]);
Return substr ($ body, 0,400 );
}
// Obtain the title content
Public function getTitle ($ content ){
Preg_match ('/(. +) <\/Title>/I ', $ content, $ matchs );
Return $ matchs [1];
}
// Obtain the link
Public function getLinks ($ content ){
$ Pat = '/] (. *?) Href = "(.*?) "(. *?)> (.*?) <\/A>/I ';
Preg_match_all ($ pat, $ content, $ matchs );
$ Result ['href '] = $ matchs [2];
$ Result ['name'] = $ this-> stripTags ($ matchs [4]);
Return $ result;
}
// Socket listener
Public function socketOpen ($ url ){
$ Fp = fsockopen ($ url, 80, $ errno, $ errstr, 30 );
If ($ fp = false ){
Echo "connection failed: $ errstr ($ errno)
";
Return false;
}
Else {
$ Out = "GET/HTTP/1.1 \ r \ n ";
$ Out. = "Host: $ url \ r \ n ";
$ Out. = "Connection: Close \ r \ n ";
Fwrite ($ fp, $ out );
$ Content = '';
While (! Feof ($ fp )){
$ Content. = fgets ($ fp, 1024 );
}
Fclose ($ fp );
Var_dump ($ content); exit;
Return $ content;
}
}
// Obtain the content of the specified url
Public function getContent ($ url ){
$ Ch = @ curl_init ($ url );
@ Curl_setopt ($ ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE
7.0; Windows NT 5.0 )");
Ob_start ();
$ Result = @ curl_exec ($ ch );
$ Content = ob_get_clean ();
Ob_end_clean ();
@ Curl_close ($ ch );
Return $ content;
}
// Extract the script and style labels
Public function stripHTML ($ string ){
$ Pat = array (
"/ ]. *?>. *? <\/Script>/I ",
"/ ]. *?>. *? <\/Style>/I"
);
$ Rep = array ('','');
Return preg_replace ($ pat, $ rep, $ string );
}
// Remove tags from array elements
Public function stripTags (& $ arr ){
Foreach ($ arr as $ key => $ val)
{
If (is_array ($ val )){
$ This-> stripTags ($ arr [$ key]);
}
Else {
$ Arr [$ key] = strip_tags ($ val );
}
}
Return $ arr;
}
Function show (){
Echo"
";
print_r($this->_sites);
echo "
";
}
// End Class Engine
}
$ Engine = new Engine ('http: // www.163.com ');
$ Engine-> start ();
$ Engine-> show ();
?>
This is only the main part of the engine. The next step is to store the relevant information into the database, and then repeat all the obtained connections.
To search and store the relevant information in the database, the core part is that after we obtain the information
To set the keyword of the website, and then give it a ranking for future search. You can only think about the ranking and keywords of a website.
.