A swing program to determine the number of good links and bad links contained within a URL page

Source: Internet
Author: User

Entry class

Import Java.awt.dimension;import Java.awt.insets;import Java.awt.event.actionevent;import Java.awt.event.actionlistener;import Java.io.ioexception;import Java.net.malformedurlexception;import Java.net.url;import Java.net.urlconnection;import Javax.swing.jbutton;import Javax.swing.jframe;import Javax.swing.jlabel;import Javax.swing.jmenubar;import Javax.swing.jscrollpane;import Javax.swing.JTextArea;import Javax.swing.jtextfield;import Javax.swing.scrollpaneconstants;import javax.swing.swingutilities;/** * Description Check that the URL is a valid URL, the entry class, run the class directly, paste the URL address that you want to parse into the text box * * @author Wangxu * */public class Checklinks extends JFrame implements Ru Nnable, ispiderreportable {//used by Addnotifyboolean framesizeadjusted = false; JLabel Label1 = new JLabel (); JButton begin = New JButton (); JTextField url = new JTextField (); JScrollPane errorscroll = new JScrollPane (); JTextArea errors = new JTextArea (); JLabel current = new JLabel (); JLabel Goodlinkslabel = new JLabel (); JLabel Badlinkslabel = new JLabel ();p rotected Thread backgroundthread;protected Spider spider;protected URL base;protected int badlinkscount = 0;protected I NT Goodlinkscount = 0;private static final long serialversionuid = 1l;public checklinks () {settitle ("Find broken Links"); /Set JFrame title Getcontentpane (). setlayout (null);//Set Layout mode SetSize (405, 288); setvisible (true); Label1.settext ("Enter A URL: "); Getcontentpane (). Add (Label1); Label1.setbounds (n, v, +); Begin.settext (" Begin "); Begin.setactioncommand ("Begin"); Getcontentpane (). Add (Begin), Begin.setbounds (12, 36, 84, 24);//set coordinates and width, High Getcontentpane (). Add (URL), Url.setbounds (108, 288, Errorscroll.setautoscrolls); (true);// Automatic display of scroll bar errorscroll.sethorizontalscrollbarpolicy (scrollpaneconstants.horizontal_scrollbar_always);// Horizontal direction always shows Errorscroll.setverticalscrollbarpolicy (scrollpaneconstants.vertical_scrollbar_always);// The vertical direction always displays Errorscroll.setopaque (true);//Set Opaque Getcontentpane (). Add (Errorscroll); Errorscroll.setbounds (12, 120, 384, 156); errors.seteditable (false);//Set non-editable errOrscroll.getviewport (). Add (Errors);//Add text fields to the scrollbar errors.setbounds (0, 0, 366, 138); Current.settext ("Currently Processing: "); Getcontentpane (). Add (current),//Add the Jlabelcurrent.setbounds (12, 72, 384, 12) to display the present information; Goodlinkslabel.settext ("Good links:0"); Getcontentpane (). Add (Goodlinkslabel); Goodlinkslabel.setbounds (12, 96, 192, Badlinkslabel.settext ("Bad links:0"); Getcontentpane (). Add (Badlinkslabel); Badlinkslabel.setbounds (216, 96, 96, 12); Symaction lsymaction = new Symaction ();//Instantiate an event listener Begin.addactionlistener (lsymaction);//register Listener}static public void Main (String args[]) {new Checklinks ();//Program Entry}public void Addnotify () {//Record the size of the window prior to calling parent ' s ADDNOTIFY.DI Mension size = GetSize (); super.addnotify (); if (framesizeadjusted) return;framesizeadjusted = true;//Adjust size of frame According to the insets and menu barinsets insets = Getinsets (); JMenuBar MenuBar = Getrootpane (). Getjmenubar (); int menubarheight = 0;if (MenuBar! = null) Menubarheight = MENUBAR.GETPREferredsize (). Height;setsize (insets.left + insets.right + size.width, Insets.top + insets.bottom + size.height + MenuBarH eight);} Class Symaction implements ActionListener {public void actionperformed (ActionEvent event) {Object object = Event.getsourc E (); if (object = = begin) Begin_actionperformed (event);}} void Begin_actionperformed (ActionEvent event) {if (Backgroundthread = = null) {Begin.settext ("Cancel"); Backgroundthread = new Thread (this);//Use the current object to instantiate a thread object Backgroundthread.start ();//Start thread, execute Run method goodlinkscount = 0; Badlinkscount = 0;} else {spider.cancel ();//Set flag bit True}} @Overridepublic void Run () {try {errors.settext (""); spider = new Spider (this);// Instantiate a spider object with the current object, because the current class implements the Ispiderreportable interface Spider.clear (); base = new URL (Url.gettext ());// Get the URL address Spider.addurl (base) to search for,//Add the URL address to Spiderspider.begin ();//spider start working Runnable dolater = new Runnable () { public void Run () {Begin.settext ("Begin");}};/ /causes Dorun.run () to execute asynchronously on the AWT event assignment thread. Occurs only after all pending awt//events have been processed. This method should be used when the application thread needs to update the GUI. In the following exampleExample, invokelater//calls the Runnable object on the event assignment thread Dohelloworld join the queue, and then outputs a message. Swingutilities.invokelater (dolater); backgroundthread = null;//re-empty the background thread to accept the next URL} catch (Malformedurlexception e) {Updateerrors err = new Updateerrors (); err.msg = "bad address."; Swingutilities.invokelater (err);}} Detects if two URL addresses belong to the same host, if True, otherwise false@overridepublic boolean spiderfoundurl (url base, url url) {updatecurrentstats cs = new Updatecurrentstats (); cs.msg = Url.tostring ();//Assign URL information to cs.msg, use background thread to print swingutilities.invokelater (CS); Checklink (URL)) {updateerrors err = new Updateerrors (); err.msg = URL + "(on page + base +") \ n "; Swingutilities.invokelater (err); Badlinkscount++;return false;} Goodlinkscount++;if (!url.gethost (). Equalsignorecase (Base.gethost ())) return False;elsereturn true; @Overridepublic void spiderurlerror (url url) {System.out.println ("url not found:" + URL);} Protected boolean checklink (url url) {try {urlconnection connection = url.openconnection (); Connection.connect (); return true;} catch (IoexceptioN e) {return false;}} public void Spiderfoundemail (String email) {System.out.println ("Get email:" + email);} Class Updateerrors implements Runnable {public String msg;public void Run () {errors.append (msg);}} Class Updatecurrentstats implements Runnable {public String msg;public void Run () {Current.settext ("currently processing : "+ msg"); Goodlinkslabel.settext ("Good Links:" + goodlinkscount); Badlinkslabel.settext ("Bad Links:" + Badlinkscount);} }}
The import javax.swing.text.html.*;/** * Swing JEditorPane text component supports different kinds of content through a plug-in mechanism called Editorkit. Because HTML * is a popular content format, some support is provided by default. This class provides default support for HTML version 3.2 (with some extensions) and is migrating to version 4.0 *. The <applet> tag is not supported, but some support is provided for the <object> tag. *  * @author Wangxu *  */public class Htmlparse extends HTMLEditorKit {private static final long Serialversionuid = 1l;public Htmleditorkit.parser Getparser () {return Super.getparser ();}}
Import java.net.*;p Ublic interface Ispiderreportable {//Find URL link public boolean spiderfoundurl (url base, url url);p ublic v oid spiderurlerror (URL URL);//Find the email link public void spiderfoundemail (String email);}
Import java.util.*;import java.net.*;import java.io.*;import javax.swing.text.*;import javax.swing.text.html.*; public class Spider {//loading wrong working set protected Collection workloaderror = new ArrayList (3);//Waiting Working set protected Collection Workloa dwaiting = new ArrayList (3);//Working Set processed protected Collection workloadprocessed = new ArrayList (3);p rotected Ispiderreportable report;protected Boolean cancel = false;public Spider (ispiderreportable report) {this.report = report;} Public Collection Getworkloaderror () {return workloaderror;} Public Collection getworkloadwaiting () {return workloadwaiting;} Public Collection getworkloadprocessed () {return workloadprocessed;} public void Clear () {Getworkloaderror (). Clear (); Getworkloadwaiting (). Clear (); getworkloadprocessed (). Clear (); public void Cancel () {Cancel = true;} public void addurl (URL url) {if (Getworkloadwaiting (). Contains (URL))///If the waiting working set already contains the URL, return return;if ( Getworkloaderror (). Contains (URL))//If the working set of errors already contains the URL, return return;if (getworkloadprocessed (). Contains (URL))//If the working set contains the URL, return Return;log ("Adding to Workload:" + URL); getworkloadwaiting (). Add (URL);//Add it to the working set to wait}// Method of the specific parsing URL public void processurl (url url) {try {log ("Processing:" + URL);//console print processing URL address//Get the URL ' s contentsurlconn ection connection = Url.openconnection (); System.out.println (Connection.getcontenttype () + "++++++++++++++++===="); if (Connection.getcontenttype ()! = null) &&!connection.getcontenttype (). toLowerCase (). StartsWith ("text/")) {getworkloadwaiting (). Remove (URL); Getworkloadprocessed (). Add (URL); log ("Not processing because content type is:" + connection.getcontenttype ()); return;} Read the Urlinputstream is = Connection.getinputstream ();  Reader r = new InputStreamReader (IS);//Parse the Urlhtmleditorkit.parser parse = new Htmlparse (). Getparser ();//Parse the Given stream and drive the given callback with the//results of the parse. This method should is implemented to be//thread-safe.//parses the given stream and drives the given callback by parsing the result. When the method finishes executing, the given callback function is called Parse.parse (R, New parseR (URL), true);} catch (IOException e) {//If error getworkloadwaiting (). Remove (URL);//Remove Urlgetworkloaderror () from working set. Add (URL);// Add the error URL to the wrong working set log ("Error:" + URL); report.spiderurlerror (URL);//Urlreturn to report the error;} Mark URL as Completegetworkloadwaiting (). Remove (URL); getworkloadprocessed (). Add (URL); log ("Complete:" + URL);} The spider works as long as the waiting working set is not empty, and the flag bit is false, then the Urlpublic Void Begin () {cancel = False;while () is removed from the collection. IsEmpty () &&!cancel) {Object list[] = getworkloadwaiting (). ToArray (); for (int i = 0; (I < list.length) &&!cancel; i++) Processurl ((URL) list[i]);//method to invoke parsing URL}}protected class Parser extends Htmleditorkit.parsercallback {protected URL base;public Parser (url base) {this.base = base;} public void Handlesimpletag (HTML. Tag tag, mutableattributeset mutableattributeset, int pos) {String href = (string) mutableattributeset.getattribute (HTML . ATTRIBUTE.HREF);//Gets the HREF link if ((HREF = = null) && (tag = = HTML. tag.frame)) href = (String) mutableattributEset.getattribute (HTML. ATTRIBUTE.SRC); if (href = = null) Return;int i = Href.indexof (' # '), if (i! =-1) href = href.substring (0, I);//Start intercepting the ' # ' character if ( Href.tolowercase (). StartsWith ("mailto:")) {//If it is a mail link report.spiderfoundemail (href); return;} if (tag = = HTML. Tag.meta) {string title = (String) mutableattributeset.getattribute (HTML. Attribute.name); System.out.println ("title:" + title);} Handle the newly obtained link handlelink (base, href);} public void Handlestarttag (HTML. Tag T, mutableattributeset A, int pos) {Handlesimpletag (T, a, POS);//Handle the same way}//handling linked functions protected void handle Link (URL base, String str) {try {URL url = new URL (base, str);//judgment, if the same host is included, join the pending working Set if (Report.spiderfoundurl (base, URL)) Addurl (URL);} catch (malformedurlexception e) {log ("Found Malformed URL:" + str);}}} public void log (String entry) {System.out.println ((new Date ()) + ":" + Entry);}}

A swing program to determine the number of good links and bad links contained within a URL page

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.