The Tokenizer library provides a predefined four participle objects, where Char_delimiters_separator is deprecated. The others are as follows:
1. Char_separator
Char_separator has two constructors
1 char_separator ()
Use the function std::isspace () to identify the discarded delimiter, and use STD::ISPUNCT () to identify the reserved delimiter. Also, discard blank words. (see Example 2)
1 char_separator (// The delimiter is not preserved 2 const Char*
Dropped_delims,
3
//
reserved delimiter
4
const char* kept _delims =
0
5 // The space separator is not reserved by default, and Keep_empty_tokens 6 Empty_token_policy empt Y_tokens = drop_empty_tokens)
The function creates a Char_separator object that is used to create a token_iterator or tokenizer to perform word decomposition. Both dropped_delims and kept_delims are strings, where each character is used as a delimiter when decomposing . When a delimiter is encountered in the input sequence, the current word is completed and the next new word is started. the delimiter in Dropped_delims does not appear in the output word , and the delimiter in kept_delims is output as a word . If Empty_tokens is Drop_empty_tokens, the blank Word does not appear in the output. If Empty_tokens is keep_empty_tokens, the blank Word will appear in the output. (see example 3)
2. Escaped_list_separator
Escaped_list_separator has two constructors, the following three characters are delimiters: ' \ ' , ', ' , ' '
1 Explicit ' \\ ' ' , ' ' \" '
1 escaped_list_separator (String_type E, String_type C, String_type Q):
3. Offset_separator
Offset_separator has a useful constructor
1 template<typename iter>2 offset_separator (Iter begin,iter end,boolTrue booltrue);
1 voidTest_string_tokenizer ()2 {3 using namespaceboost;4 5 //1. Create a word breaker with default template parameters, by default all spaces and punctuation are used as separators.6 {7STD::stringStr"Link Raise the Master-sword.");8 9Tokenizer<>Tok (str);Ten for(Boost_auto (POS, Tok.begin ()); pos! = Tok.end (); + +POS) OneStd::cout <<"["<< *pos <<"]"; AStd::cout <<Std::endl; - //[Link][raise][the][master][sword] - } the - //2. Char_separator () - { -STD::stringStr"Link Raise the Master-sword."); + - //a Char_separator object, the default constructor (preserves punctuation but sees it as a delimiter) +char_separator<Char>Sep; Atokenizer<char_separator<Char> >Tok (str, SEP); at for(Boost_auto (POS, Tok.begin ()); pos! = Tok.end (); + +POS) -Std::cout <<"["<< *pos <<"]"; -Std::cout <<Std::endl; - //[link][raise][the][master][-][sword][.] - } - in //3. Char_separator (const char* dropped_delims, - //const char* kept_delims = 0, to //empty_token_policy empty_tokens = drop_empty_tokens) + { -STD::stringstr =";!!; hello|world| | -foo--bar;yow;baz|"; the *char_separator<Char> Sep1 ("-;|"); $tokenizer<char_separator<Char> >Tok1 (str, SEP1);Panax Notoginseng for(Boost_auto (POS, Tok1.begin ()); pos! = Tok1.end (); + +POS) -Std::cout <<"["<< *pos <<"]"; theStd::cout <<Std::endl; + // [!!] [Hello] [World] [Foo] [Bar] [Yow] [Baz] A thechar_separator<Char> Sep2 ("-;","|", keep_empty_tokens); +tokenizer<char_separator<Char> >tok2 (str, SEP2); - for(Boost_auto (POS, Tok2.begin ()); pos! = Tok2.end (); + +POS) $Std::cout <<"["<< *pos <<"]"; $Std::cout <<Std::endl; - // [][!!] [Hello] [|] [World] [|] [][|] [][foo][][bar][yow][baz][|] [] - } the - //4. Escaped_list_separatorWuyi { theSTD::stringstr ="field 1,\ "Putting quotes around fields, allows Commas\", Field 3"; - Wutokenizer<escaped_list_separator<Char> >Tok (str); - for(Boost_auto (POS, Tok.begin ()); pos! = Tok.end (); + +POS) AboutStd::cout <<"["<< *pos <<"]"; $Std::cout <<Std::endl; - //[Field 1][putting quotes around fields, allows Commas][field 3] - //commas within quotation marks are not allowed as delimiters. - } A + //5. Offset_separator the { -STD::stringstr ="12252001400"; $ the intOffsets[] = {2,2,4}; theOffset_separator f (offsets, offsets +3); theTokenizer<offset_separator>Tok (str, f); the - for(Boost_auto (POS, Tok.begin ()); pos! = Tok.end (); + +POS) inStd::cout <<"["<< *pos <<"]"; theStd::cout <<Std::endl; the } About}
Boost::tokenizer detailed