Use jsoup to process data streams containing html codes.
Package com.cn;
Import java. util. arraylist;
Import java. util. hashmap;
Import java. util. List;
Import java. util. Map;
Import org. jsoup. jsoup;
Import org. jsoup. nodes. Document;
Import org. jsoup. Select. elements;
Public class CV {
/**
* @ Param ARGs
*/
/**
* @ Param ARGs
*/
Public static void main (string [] ARGs ){
// Todo auto-generated method stub
Elements TDS = NULL;
String html = "gfg <br> <Table> <tr> <TD> 1 </TD> <TD> yecao </TD> </tr> <TD> 2 </TD> <TD> YC </TD> </tr> </table> "; // test content
List <Map <string, Object> List = new arraylist <Map <string, Object> ();
// Start using jsoup
// Jsoup supports converting a document class to a document class.
Document document = jsoup. parse (HTML );
// A document consists of elements. We select the tag starting with "TR" and store it in the TRS element group.
Elements TRS = Document. Select ("TR ");
// Obtain the number of tags that contain TR in the entire HTML.
Int totaltrs = TRS. Size ();
// We can observe the HTML without search results. If totaltrs is <= 3, no result is returned.
// As long as there is a bibliography result, totaltrs must be greater than 3, so
If (totaltrs> 0)
For (INT I = 0; I <totaltrs; I ++)
{
// Observe the HTML. Starting from the I + 2 TR, what we need is the bibliography information.
// We select the TD Tag Element Group from each tr.
TDS = TRS. Get (I). Select ("TD ");
// Obtain the number of TD in each tr
Int totaltds = TDS. Size ();
// A temporary hashmap containing string-object key-value pairs
Map <string, Object> map = new hashmap <string, Object> ();
// J is the number of identifiers.
For (Int J = 0; j <totaltds; j ++)
{
Switch (j ){
Case 0:
Map. Put ("001", tds.get(j).html (). tostring ());
Break;
Case 1:
Map. Put ("002", tds.get(j).html (). tostring ());
Break;
Default:
Break;
}
}
List. Add (MAP );
}
System. Out. println (document );
System. Out. println (TRS );
System. Out. println (TRS. Size ());
System. Out. println (TDS );
System. Out. println (TDS. Size ());
System. Out. println (list );
}
}
The command box displays the following content: