Import java. Io. file;
Import java. Io. fileinputstream;
Import java. Io. fileoutputstream;
Import java. Io. reader;
Import java. Io. stringreader;
Import java. Io. stringwriter;
Import java. util. arraylist;
Import java. util. hashmap;
Import java. util. iterator;
Import java. util. List;
Import java. util. Map;
Import java. util. Set;
Import com. hothouseobjects. tags. inspector;
Import com. hothouseobjects. tags. Tag;
Import com. hothouseobjects. tags. tagtiller;
Import com. hothouseobjects. tags. text;
Public class parsehtml {
Public static void main (string [] ARGs ){
Try {
File file = new file ("D: // ttt.htm ");
Int Len = (INT) file. Length ();
Byte [] B;
B = new byte [Len];
Fileinputstream FCM = new fileinputstream (File );
FS. Read (B );
FCM. Close ();
Reader read;
Read = new stringreader (new string (B ));
Tagtiroller = new tagtiroller (read );
Tagtiler. runtiler ();
Tag thepage = tagtiler. gettilledtags ();
// Search <a href = ......
List thehref = inspector. collectbytype (thepage, "");
Int I = thehref. Size ();
While (I> 0 ){
// System. Out. println (TAG) thehref. Get (I-1). tohtml ());
System. Out. println (TAG) thehref. Get (I-1). getattributevalue ("href "));
// System. Out. println (TAG) thehref. Get (I-1). getattributevalue ("target "));
// System. Out. println (TAG) thehref. Get (I-1). getitem (0). tohtml ());
I-= 1;
}
// Search List theimg = inspector. collectbytype (thepage, "IMG ");
Int J = 0;
While (j <theimg. Size ()){
System. Out. println (TAG) theimg. Get (j). tohtml ());
System. Out. println (TAG) theimg. Get (j). getattributevalue ("src "));
System. Out. println (TAG) theimg. Get (j). getattributevalue ("Alt "));
J + = 1;
}
// Out put file of HTML
Tag big = new tag ("h1 ");
Tag Red = new tag ("font ");
Tag underlined = new tag ("U ");
Red. setattribute ("color", "# ccoooo ");
Big. additem (red );
Red. additem (underlined );
Underlined. additem (new text ("sorry, no addresses found ."));
Tag Title = inspector. locatebytype (thepage, "title ");
Title. additem (BIG );
Stringwriter writer = new stringwriter ();
Thepage. tohtml (writer );
File file1 = new file ("test000.html ");
Fileoutputstream Fos = new fileoutputstream (file1 );
FOS. Write (writer. tostring (). getbytes ());
FOS. Close ();
// Search of <Table
List thetd = inspector. collectbytype (thepage, "TD ");
Int K = 0;
While (k <thetd. Size ()){
System. Out. println (TAG) thetd. Get (k). tohtml ());
K + = 1;
}
}
Catch (exception ex ){
Ex. printstacktrace ();
}
}
}