Using System; using System. collections. generic; using System. componentModel; using System. data; using System. drawing; using System. linq; using System. text; using System. windows. forms; using Tool; using System. net; using System. text. regularExpressions; using System. threading; namespace Search {public partial class Form1: Form {public Form1 () {InitializeComponent () ;}/ *** queue, save the URL */public class Queue {// use the linked list to implement the Queue private Queue list
Queue = new queue list
(); // Public void enQueue (string t) {queue. addLast (t);} // public string deQueue () {string o = queue. last. value; queue. removeLast (); return o;} // determines whether the queue is empty. public bool isQueueEmpty () {return queue. count> 0? False: true;} // determines whether the queue contains t public bool contians (string t) {return queue. contains (t);} public int getcount () {return queue. count ;}} public class LinkQueue {// The accessed url set private static ISet
VisitedUrl = new HashSet
(); // The url set to be accessed private static Queue unVisitedUrl = new Queue (); // obtain the URL Queue public static Queue getUnVisitedUrl () {return unVisitedUrl ;} // Add to the accessed URL queue public static void addVisitedUrl (String url) {visitedUrl. add (url);} // remove the accessed URL public static void removeVisitedUrl (String url) {visitedUrl. remove (url);} // unaccessed URL output queue public static Object unVisitedUrlDeQueue () {return unVisitedUrl. deQueue () ;}// Ensure that each URL is accessed only once. public static void addUnvisitedUrl (String url) {if (url! = Null &&! Url. Trim (). Equals ("")&&! VisitedUrl. Contains (url )&&! UnVisitedUrl. contians (url) unVisitedUrl. enQueue (url);} // obtain the number of accessed URLs. public static int getVisitedUrlNum () {return visitedUrl. count;} // determines whether the unaccessed URL queue is empty. public static bool unVisitedUrlsEmpty () {return unVisitedUrl. isQueueEmpty () ;}} string [] urlarr = new string [100]; private void button#click (object sender, EventArgs e) {zzHttp http = new zzHttp (); cookieContainer cookie = new CookieContain Er (); string url = textBox1.Text! = ""? TextBox1.Text: "http://image.baidu.com/"; string content = http. sendDataByGET (url, "", ref cookie); string baseUri = Utility. getBaseUri (url); string [] links = Parser. extractLinks (baseUri, content); foreach (string link in links) {richTextBox1.Text + = link; richTextBox1.Text + = "\ n";} Regex regImg = new Regex (@ "] *? \ Bsrc [\ s \ t \ r \ n] * = [\ s \ t \ r \ n] * ["']? [\ S \ t \ r \ n] * (? [^ \ S \ t \ r \ n "'<>] *) [^ <>] *? /? [\ S \ t \ r \ n] *> ", RegexOptions. ignoreCase); // search for the matched string MatchCollection matches = regImg. matches (content); Queue que = new Queue (); foreach (Match match in matches) que. enQueue (match. groups ["imgUrl"]. value); int k; for (k = 0; k <que. getcount (); k ++) {string picurl = que. deQueue (); richTextBox1.Text + = picurl; richTextBox1.Text + = "\ n"; string [] s = picurl. split ('/'); string picname = s [s. length-1]; ZzHttp. downfile (picurl, picname, @ "d: \ pic \");} label1.Text = k + "" ;}// search void search () {int I = 0; linkQueue. addUnvisitedUrl ("http://blog.csdn.net/zhujunxxxxx/"); while (! LinkQueue. unVisitedUrlsEmpty () & LinkQueue. getVisitedUrlNum () <= 1000) {// queue header URL output queue String visitUrl = (String) LinkQueue. unVisitedUrlDeQueue (); if (visitUrl = null) continue; zzHttp downLoader = new zzHttp (); CookieContainer cookie = new CookieContainer (); // download the webpage string content = downLoader. sendDataByGET (visitUrl, "", ref cookie); // put the URL into the accessed URL LinkQueue. addVisitedUrl (visitUrl); // extracts the URL string baseUri = Utility from the downloaded webpage. getBaseUri (visitUrl); string [] links = Parser. extractLinks (baseUri, content); // a new unaccessed URL is queued for I ++; Add2Message ("accessed quantity:" + LinkQueue. getVisitedUrlNum () + ", count =" + LinkQueue. getUnVisitedUrl (). getcount (); foreach (string link in links) {if (link. contains ("css") | link. contains ("js") | link. contains ("gif") | link. contains ("jpg") | link. contains ("png") | link. contains ("jpeg") continue; LinkQueue. addUnvisitedUrl (link); AddMessage (link) ;}} private void button2_Click (object sender, EventArgs e) {new Thread (search ). start ();} private delegate void InfoDelegate (string message); public void AddMessage (string message) {if (richTextBox1.InvokeRequired) // create a delegate {InfoDelegate d = new InfoDelegate (AddMessage); richTextBox1.Invoke (d, new object [] {message});} else {richTextBox1.AppendText (message + Environment. newLine); richTextBox1.ScrollToCaret () ;}} private delegate void Info2Delegate (string message); public void Add2Message (string message) {if (label2.InvokeRequired) // create the delegate {Info2Delegate d = new Info2Delegate (Add2Message); label2.Invoke (d, new object [] {message}) if the delegate cannot be accessed });} else {label2.Text = message ;}}}}