Import Java.io.File;
Import Java.io.FileInputStream;
Import Java.io.FileOutputStream;
Import Java.io.Reader;
Import Java.io.StringReader;
Import Java.io.StringWriter;
Import java.util.ArrayList;
Import Java.util.HashMap;
Import Java.util.Iterator;
Import java.util.List;
Import Java.util.Map;
Import Java.util.Set;
Import Com.hothouseobjects.tags.Inspector;
Import Com.hothouseobjects.tags.Tag;
Import Com.hothouseobjects.tags.TagTiller;
Import Com.hothouseobjects.tags.Text;
public class Parsehtml {
public static void Main (string[] args) {
try {
file = new file ("d://ttt.htm");
int len = (int) file.length ();
byte[] b;
B = new Byte[len];
FileInputStream fis = new FileInputStream ( file);
Fis.read (b);
fis.close ();
Reader Read;
read = new StringReader (new String (b));
Tagtiller Tagtiller = new Tagtiller (read);
Tagtiller.runtiller ();
Tag thepage = Tagtiller.gettilledtags ();
Search <a href= ...
List thehref = Inspector.collectbytype (thepage, "a");
int i = Thehref.size ();
while (i>0) {
System.out.println (((TAG) Thehref.get (i-1)). ToHTML ());
System.out.println (((TAG) Thehref.get (i-1)). Getattributevalue ("href");
System.out.println (((TAG) Thehref.get (i-1)). Getattributevalue ("target");
System.out.println (((TAG) Thehref.get (i-1)). GetItem (0). ToHTML ());
I-=1;
}
Search List theimg = Inspector.collectbytype (thepage, "img");
int j = 0;
while (J < Theimg.size ()) {
System.out.println (((Tag) Theimg.get (j)). ToHTML ());
System.out.println (((Tag) Theimg.get (j)). Getattributevalue ("src"));
System.out.println (((Tag) Theimg.get (j)). Getattributevalue ("alt"));
J +=1;
}
Out put file of HTML
Tag big = new Tag ("H1");
Tag red = new Tag ("FONT");
Tag underlined = new Tag ("U");
Red.setattribute ("Color", "#CCOOOO");
Big.additem (red);
Red.additem (underlined);
Underlined.additem (New Text ("Sorry, no addresses found."));
Tag title = Inspector.locatebytype (thepage, "title");
Title.additem (big);
StringWriter writer = new StringWriter ();
Thepage.tohtml (writer);
File File1 = new file ("test000.html");
FileOutputStream fos = new FileOutputStream (file1);
Fos.write (Writer.tostring (). GetBytes ());
Fos.close ();
Search of <table
List thetd = Inspector.collectbytype (thepage, "TD");
int k = 0;
while (K < Thetd.size ()) {
System.out.println (((Tag) Thetd.get (k)). ToHTML ());
K +=1;
}
}
catch (Exception ex) {
Ex.printstacktrace ();
}
}
}