Entry class
Import Java.awt.dimension;import Java.awt.insets;import Java.awt.event.actionevent;import Java.awt.event.actionlistener;import Java.io.ioexception;import Java.net.malformedurlexception;import Java.net.url;import Java.net.urlconnection;import Javax.swing.jbutton;import Javax.swing.jframe;import Javax.swing.jlabel;import Javax.swing.jmenubar;import Javax.swing.jscrollpane;import Javax.swing.JTextArea;import Javax.swing.jtextfield;import Javax.swing.scrollpaneconstants;import javax.swing.swingutilities;/** * Description Check that the URL is a valid URL, the entry class, run the class directly, paste the URL address that you want to parse into the text box * * @author Wangxu * */public class Checklinks extends JFrame implements Ru Nnable, ispiderreportable {//used by Addnotifyboolean framesizeadjusted = false; JLabel Label1 = new JLabel (); JButton begin = New JButton (); JTextField url = new JTextField (); JScrollPane errorscroll = new JScrollPane (); JTextArea errors = new JTextArea (); JLabel current = new JLabel (); JLabel Goodlinkslabel = new JLabel (); JLabel Badlinkslabel = new JLabel ();p rotected Thread backgroundthread;protected Spider spider;protected URL base;protected int badlinkscount = 0;protected I NT Goodlinkscount = 0;private static final long serialversionuid = 1l;public checklinks () {settitle ("Find broken Links"); /Set JFrame title Getcontentpane (). setlayout (null);//Set Layout mode SetSize (405, 288); setvisible (true); Label1.settext ("Enter A URL: "); Getcontentpane (). Add (Label1); Label1.setbounds (n, v, +); Begin.settext (" Begin "); Begin.setactioncommand ("Begin"); Getcontentpane (). Add (Begin), Begin.setbounds (12, 36, 84, 24);//set coordinates and width, High Getcontentpane (). Add (URL), Url.setbounds (108, 288, Errorscroll.setautoscrolls); (true);// Automatic display of scroll bar errorscroll.sethorizontalscrollbarpolicy (scrollpaneconstants.horizontal_scrollbar_always);// Horizontal direction always shows Errorscroll.setverticalscrollbarpolicy (scrollpaneconstants.vertical_scrollbar_always);// The vertical direction always displays Errorscroll.setopaque (true);//Set Opaque Getcontentpane (). Add (Errorscroll); Errorscroll.setbounds (12, 120, 384, 156); errors.seteditable (false);//Set non-editable errOrscroll.getviewport (). Add (Errors);//Add text fields to the scrollbar errors.setbounds (0, 0, 366, 138); Current.settext ("Currently Processing: "); Getcontentpane (). Add (current),//Add the Jlabelcurrent.setbounds (12, 72, 384, 12) to display the present information; Goodlinkslabel.settext ("Good links:0"); Getcontentpane (). Add (Goodlinkslabel); Goodlinkslabel.setbounds (12, 96, 192, Badlinkslabel.settext ("Bad links:0"); Getcontentpane (). Add (Badlinkslabel); Badlinkslabel.setbounds (216, 96, 96, 12); Symaction lsymaction = new Symaction ();//Instantiate an event listener Begin.addactionlistener (lsymaction);//register Listener}static public void Main (String args[]) {new Checklinks ();//Program Entry}public void Addnotify () {//Record the size of the window prior to calling parent ' s ADDNOTIFY.DI Mension size = GetSize (); super.addnotify (); if (framesizeadjusted) return;framesizeadjusted = true;//Adjust size of frame According to the insets and menu barinsets insets = Getinsets (); JMenuBar MenuBar = Getrootpane (). Getjmenubar (); int menubarheight = 0;if (MenuBar! = null) Menubarheight = MENUBAR.GETPREferredsize (). Height;setsize (insets.left + insets.right + size.width, Insets.top + insets.bottom + size.height + MenuBarH eight);} Class Symaction implements ActionListener {public void actionperformed (ActionEvent event) {Object object = Event.getsourc E (); if (object = = begin) Begin_actionperformed (event);}} void Begin_actionperformed (ActionEvent event) {if (Backgroundthread = = null) {Begin.settext ("Cancel"); Backgroundthread = new Thread (this);//Use the current object to instantiate a thread object Backgroundthread.start ();//Start thread, execute Run method goodlinkscount = 0; Badlinkscount = 0;} else {spider.cancel ();//Set flag bit True}} @Overridepublic void Run () {try {errors.settext (""); spider = new Spider (this);// Instantiate a spider object with the current object, because the current class implements the Ispiderreportable interface Spider.clear (); base = new URL (Url.gettext ());// Get the URL address Spider.addurl (base) to search for,//Add the URL address to Spiderspider.begin ();//spider start working Runnable dolater = new Runnable () { public void Run () {Begin.settext ("Begin");}};/ /causes Dorun.run () to execute asynchronously on the AWT event assignment thread. Occurs only after all pending awt//events have been processed. This method should be used when the application thread needs to update the GUI. In the following exampleExample, invokelater//calls the Runnable object on the event assignment thread Dohelloworld join the queue, and then outputs a message. Swingutilities.invokelater (dolater); backgroundthread = null;//re-empty the background thread to accept the next URL} catch (Malformedurlexception e) {Updateerrors err = new Updateerrors (); err.msg = "bad address."; Swingutilities.invokelater (err);}} Detects if two URL addresses belong to the same host, if True, otherwise false@overridepublic boolean spiderfoundurl (url base, url url) {updatecurrentstats cs = new Updatecurrentstats (); cs.msg = Url.tostring ();//Assign URL information to cs.msg, use background thread to print swingutilities.invokelater (CS); Checklink (URL)) {updateerrors err = new Updateerrors (); err.msg = URL + "(on page + base +") \ n "; Swingutilities.invokelater (err); Badlinkscount++;return false;} Goodlinkscount++;if (!url.gethost (). Equalsignorecase (Base.gethost ())) return False;elsereturn true; @Overridepublic void spiderurlerror (url url) {System.out.println ("url not found:" + URL);} Protected boolean checklink (url url) {try {urlconnection connection = url.openconnection (); Connection.connect (); return true;} catch (IoexceptioN e) {return false;}} public void Spiderfoundemail (String email) {System.out.println ("Get email:" + email);} Class Updateerrors implements Runnable {public String msg;public void Run () {errors.append (msg);}} Class Updatecurrentstats implements Runnable {public String msg;public void Run () {Current.settext ("currently processing : "+ msg"); Goodlinkslabel.settext ("Good Links:" + goodlinkscount); Badlinkslabel.settext ("Bad Links:" + Badlinkscount);} }}
The import javax.swing.text.html.*;/** * Swing JEditorPane text component supports different kinds of content through a plug-in mechanism called Editorkit. Because HTML * is a popular content format, some support is provided by default. This class provides default support for HTML version 3.2 (with some extensions) and is migrating to version 4.0 *. The <applet> tag is not supported, but some support is provided for the <object> tag. * * @author Wangxu * */public class Htmlparse extends HTMLEditorKit {private static final long Serialversionuid = 1l;public Htmleditorkit.parser Getparser () {return Super.getparser ();}}
Import java.net.*;p Ublic interface Ispiderreportable {//Find URL link public boolean spiderfoundurl (url base, url url);p ublic v oid spiderurlerror (URL URL);//Find the email link public void spiderfoundemail (String email);}
Import java.util.*;import java.net.*;import java.io.*;import javax.swing.text.*;import javax.swing.text.html.*; public class Spider {//loading wrong working set protected Collection workloaderror = new ArrayList (3);//Waiting Working set protected Collection Workloa dwaiting = new ArrayList (3);//Working Set processed protected Collection workloadprocessed = new ArrayList (3);p rotected Ispiderreportable report;protected Boolean cancel = false;public Spider (ispiderreportable report) {this.report = report;} Public Collection Getworkloaderror () {return workloaderror;} Public Collection getworkloadwaiting () {return workloadwaiting;} Public Collection getworkloadprocessed () {return workloadprocessed;} public void Clear () {Getworkloaderror (). Clear (); Getworkloadwaiting (). Clear (); getworkloadprocessed (). Clear (); public void Cancel () {Cancel = true;} public void addurl (URL url) {if (Getworkloadwaiting (). Contains (URL))///If the waiting working set already contains the URL, return return;if ( Getworkloaderror (). Contains (URL))//If the working set of errors already contains the URL, return return;if (getworkloadprocessed (). Contains (URL))//If the working set contains the URL, return Return;log ("Adding to Workload:" + URL); getworkloadwaiting (). Add (URL);//Add it to the working set to wait}// Method of the specific parsing URL public void processurl (url url) {try {log ("Processing:" + URL);//console print processing URL address//Get the URL ' s contentsurlconn ection connection = Url.openconnection (); System.out.println (Connection.getcontenttype () + "++++++++++++++++===="); if (Connection.getcontenttype ()! = null) &&!connection.getcontenttype (). toLowerCase (). StartsWith ("text/")) {getworkloadwaiting (). Remove (URL); Getworkloadprocessed (). Add (URL); log ("Not processing because content type is:" + connection.getcontenttype ()); return;} Read the Urlinputstream is = Connection.getinputstream (); Reader r = new InputStreamReader (IS);//Parse the Urlhtmleditorkit.parser parse = new Htmlparse (). Getparser ();//Parse the Given stream and drive the given callback with the//results of the parse. This method should is implemented to be//thread-safe.//parses the given stream and drives the given callback by parsing the result. When the method finishes executing, the given callback function is called Parse.parse (R, New parseR (URL), true);} catch (IOException e) {//If error getworkloadwaiting (). Remove (URL);//Remove Urlgetworkloaderror () from working set. Add (URL);// Add the error URL to the wrong working set log ("Error:" + URL); report.spiderurlerror (URL);//Urlreturn to report the error;} Mark URL as Completegetworkloadwaiting (). Remove (URL); getworkloadprocessed (). Add (URL); log ("Complete:" + URL);} The spider works as long as the waiting working set is not empty, and the flag bit is false, then the Urlpublic Void Begin () {cancel = False;while () is removed from the collection. IsEmpty () &&!cancel) {Object list[] = getworkloadwaiting (). ToArray (); for (int i = 0; (I < list.length) &&!cancel; i++) Processurl ((URL) list[i]);//method to invoke parsing URL}}protected class Parser extends Htmleditorkit.parsercallback {protected URL base;public Parser (url base) {this.base = base;} public void Handlesimpletag (HTML. Tag tag, mutableattributeset mutableattributeset, int pos) {String href = (string) mutableattributeset.getattribute (HTML . ATTRIBUTE.HREF);//Gets the HREF link if ((HREF = = null) && (tag = = HTML. tag.frame)) href = (String) mutableattributEset.getattribute (HTML. ATTRIBUTE.SRC); if (href = = null) Return;int i = Href.indexof (' # '), if (i! =-1) href = href.substring (0, I);//Start intercepting the ' # ' character if ( Href.tolowercase (). StartsWith ("mailto:")) {//If it is a mail link report.spiderfoundemail (href); return;} if (tag = = HTML. Tag.meta) {string title = (String) mutableattributeset.getattribute (HTML. Attribute.name); System.out.println ("title:" + title);} Handle the newly obtained link handlelink (base, href);} public void Handlestarttag (HTML. Tag T, mutableattributeset A, int pos) {Handlesimpletag (T, a, POS);//Handle the same way}//handling linked functions protected void handle Link (URL base, String str) {try {URL url = new URL (base, str);//judgment, if the same host is included, join the pending working Set if (Report.spiderfoundurl (base, URL)) Addurl (URL);} catch (malformedurlexception e) {log ("Found Malformed URL:" + str);}}} public void log (String entry) {System.out.println ((new Date ()) + ":" + Entry);}}
A swing program to determine the number of good links and bad links contained within a URL page