Larbin Search Engine Source Code Appreciation--(ii) search engine's global variable class

Source: Internet
Author: User
Tags strcmp time interval
 larbin// sebastien ailleret// 29-11-99 -> 08-03-00 #include  < unistd.h> #include  <sys/socket.h> #include  <netinet/in.h> #include  <errno.h> # include <fcntl.h> #include  <iostream.h> #include  <string.h> #include  < adns.h> #include  <netdb.h> #include  <sys/socket.h> #include   "Types.h" #include   "Global.h" #include   "xutils/text.h" #include   "xutils/fifo.h" #include   "xutils/site.h" #include   "Xutils/debug.h" #include   "xutils/maxedsizedfifo.h" #include   "xutils/persistentfifo.h" #include   "Xutils/constantsizedfifo.h" #include   "Xutils/constantsizedfifopriority.h"//////////////////////////  struct global///////////////////////////////////////////////////  define all the static variables//static variable definition process, which includes static member variables in the class. Hashtable&nbsP;*global::seen; The type of the variable is the template class genericfifo<url> *global::urlsinput; genericfifo<url> *global::urlsinternal; site *global::sitelist; genericfifo<site> *global::oksites; Genericfifo<site> *global::d nssites; connexion **global::connexions; adns_state global::ads; constantsizedfifopriority<connexion> *global::freeconns; constantsizedfifo<connexion> *global::userconns; interval *global::inter; Uint global::d epthinsite; time_t global::waitduration; char *global::useragent; char *global::sender; char *global::headers; Sockaddr_in *global::p roxyaddr; bool global::isspecific; char *global::contenttype; Char *global::p rivilegedext; Vector<char> *global::d omains; vector<char> global::forbext; char *global::firsturl; uint global::nb_conn; Uint global::d nsconn; unsigned short int global::httpport; Unsigned short int global::inputport; /** constructor : initialize allmost everything  * Everything is  read from the config file  (larbin.conf by default)  */Global::global   (int argc, char *argv[])   {  char *configFile =  "larbin.conf ";   bool reload = false;  //whether it's a reboot or a first start      //  Verification of arguments   int pos = 1;   while  (POS&NBSP;&LT;&NBSP;ARGC)      {    //argc  is greater than 1, indicating that there are parameters         if  (!strcmp (argv[pos],  "-C")  & & argc > pos+1)           {         configfile = argv[pos+1];  //#通过参数设置配置文件的名字          pos += 2;         }          else  if  (!strcmp (argv[pos],  "-reload"))   //reboot, starting at the end of last crawl          {        reload = true; //# Set reload, the specific role is not clear.         pos++;         }          else           {        break; &NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP}  &nbsp}      //program parameters have errors, prompt to use method    if  (POS&NBSP;!=&NBSP;ARGC)     { //#显示使用方法.         cerr <<  "usage : "  <<  argv[0];         cerr <<  " [-c configfile] [-reload]/n";         exit (1); &NBSP;&NBSP//#初始话设置   // standard values   waitDuration = 60;   //access to the same server, the time interval, can not be less than 30s   depthinsite = 5;      // Maximum depth of access to the Web page   userAgent =  "Larbin";  //robot name   sender =  " larbin@somewhere.com ";   //sender information to construct HTTP packets.     firstUrl =  "http://localhost/"    //the URL first accessed   nb_conn  = 20;  //maximum Parallel number connection number   dnsConn = 3;               //dns maximum number of parallel connections   httpPort = 8081;       //  interface to see the statistics of Larbin crawls through the web   inputPort = 1976;    //Telnet interface that adds input information such as URLs to Larbin   proxyaddr = null;   //Proxy server Address   isspecific = false;  // is a specific search   domains = null;   //domain name limit   // Read the  Configuration file      crash ("Read the configuration file");   //has a macro definition in Debug.h file #define crash (s)   (cerr << s <<  "/n")    parsefile (configfile);  //#解析配置文件      // initialize everything    crash ("Create global values");   // headers   String strtmp;   strtmp.addstring ("/r/nuser-agent: ");   strtmp.addstring (useragent);   strtmp.addstring (" ");   strtmp.addstring (sender);   strtmp.addstring ("/r/naccept: text/html/r/n/r/n");   headers = strtmp.givestring ();   //define static char *headers; in header file   // fifos      //The constructor here requires further attention   URLsInternal = new  Persistentfifo<url> (Fifofile, reload, this);   URLsInput = new Fifo<url>;   inter = new interval (Ramurls);   siteList = new Site[siteListSize];   okSites = new Fifo<Site>;   dnsSites = new Fifo<Site>;      seen = new hashtable (!reload);    //Initialize hash table, static hashtable *seen; defined in header file   userConns = new ConstantSizedFifo<Connexion> (Nb_conn);     //defines queues for connections that are already in use   freeConns = new ConstantSizedFifoPriority<Connexion> (nb_ conn);     //defines a queue where free does not use a connection   connexions = new connexion *[nb_ conn]; //defines the structure body array pointers for saving connection information      for  (uint i=0; i<nb_conn; i++)  //cycles are the maximum number of connections   {         connexions[i] = new connexion;  //Create a connection information structure          freeconns->put (Connexions[i]);   //the newly created connection information structure to the free connection queue       // init non blocking dns calls   crash ("Start adns");   Initializes the DNS call   adns_initflags flags = adns_initflags  (adns_if_nosigpipe |  adns_if_noerrprint);     // adns_initflags  (Adns_if_nosigpipe);   adns_init (&ads, flags, null); }/** destructor : never used because the program should never  end !  */global::~global  ()   {  cerr <<  why he hell do  you want to delete global !/n ";////////////////////////////////////////////////////////////////////function function: Parse config file//parameter:        char *file   the name//return value of the profile:   void//NOTE: Call//////////////////in constructor global /** parse configuration file */void  Global::p arsefile  (char *file)   {  int fds = open (file, o_rdonly);   if  (fds < 0)     {         cerr <<  "cannot open config file/n";         exit (1); &NBSP;&NBSP}   char *tmp = readfile (FDS);  //define text.h in,  file The BUF   close (FDS) that is equal to the file length is requested in the function;   //closes the file      // suppress  Commentary   bool eff = false;      //Remove the comment line marked with "#" in the configuration file, the number of lines that begin with "#"It all changed to spaces   for  (int i=0; tmp[i] != 0; i++)     {         switch  (Tmp[i])            {            case  ' n ':                   eff =  false;                   Break             case  ' # ':                   eff = true; //  no break !!!             default:                   if  (EFF)  tmp[i] =  '   '; &NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP}   }      String content; The   //string class String.h file has a defined   content.addstring (TMP);      delete [] tmp;      uint pos = 0;   char *tok = nexttoken (Content, &pos);    // Defined in the Text.h file, function function: Extracts the next word from a string, Nexttoken                                                                                     // Will reapply for a space to save the extracted words, so after use to pay attention to release the space      while (tok != null)     {             if  (!strcasecmp (tok,  "useragent"))                {                     //Read the user agent information in the configuration file                  delete [] tok;                 useragent  = nexttoken (Content, &pos);             }               else if  (!strcasecmp (tok,  "from"))                {                      //Read the spider owner's e-mail message in the configuration file                  delete [] tok;                 sender =  nexttoken (Content, &pos);             }               else if  (!strcasecmp (tok,  "StartURL"))                {                     //read the first crawled URL information in the configuration file                  delete []  Tok                 firstUrl = nexttoken (Content, &pos);             }               else if  (!) ( tok,  "Waitduration")               {                      //read in the configuration file, access the same Web server interval                  delete [] tok;                 tok =  nexttoken (Content, &pos);                 waitduration  = atoi (tok);  //converts a string to an integer number                  delete&nbsp [] tok;             }               else if  (!strcasecmp (tok,  "proxy"))                {                     //read the proxy information in the configuration file                  delete [] tok;                 // host  name and dns call                  tok = nexttoken (content, &pos);   //Proxy Server host name    Content is a string read from the configuration file                  struct hostent* hp;                 proxyaddr  = new sockaddr_in;                 bzero ((Char  *) proxyaddr, sizeof  (struct sockaddr_in))

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.