Syntax list:
/* From http://www.w3.org/TR/xmlschema-2/#regexs */
// [1] Regexp: = Branch ('| 'Branch )*
Re_reg_exp = re_branch [push_back (at_c <0> (_ Val), _ 1)] % '| ';
// [2] branch: = piece *
Re_branch = * (re_piece) [push_back (at_c <0> (_ Val), _ 1)];
// [3] Piece: = atom quantifier?
Re_piece = re_atom [at_c <0> (_ Val) = _ 1]>-(re_quantifier) [at_c <1> (_ Val) = _ 1];
// [4] quantifier ::= [? * +] | ('{'Quantity '}')
Re_quantifier = Qi: Char _("? * + ") | (Qi: Char _ ('{')> re_quantity> Qi: Char _('}'));
// [5] quantity: = quantrange | quantmin | quantexact
Re_quantity = re_quant_range | re_quant_exact;
// [6] quantrange: = quantexact ', 'quantexact
Re_quant_range = re_quant_exact> Qi: Char _ (',')>-re_quant_exact;
// [7] quantmin: = quantexact ','
// Note: Merge rule #7 into rule #6
// Re_quant_min = re_quant_exact> Qi: Char _(',');
// [8] quantexact: = [0-9] +
Re_quant_exact = + (Qi: Char _ ("0-9 "));
// [9] ATOM: = char | charclass | ('('regexp ')')
Re_atom = re_char [_ Val = _ 1]
| Re_char_class [_ Val = _ 1]
| (Qi: Char _ (')> re_reg_exp [_ Val = _ 1]> Qi: Char _(')'));
// [10] CHAR: = [^ .\? * + {} () | ^ $ # X5b # x5d]
// Note: exclude |, separator of branches, as well
Re_char = Qi: Char _-".\\? * + $ "-'('-')'-'['-']'-'| ';
// [11] charclass: = charclassesc | charclassexpr | wildcardesc
Re_char_class = re_char_class_esc [_ Val = _ 1]
| Re_char_class_expr [_ Val = _ 1]
| Re_wildcard_esc [_ Val = _ 1];
// [12] charclassexpr: = '['chargroup']'
// Note: No skip in 'X' Mode
Re_char_class_expr = Qi: Char _ ('[')> re_char_group [at_c <0> (_ Val) = _ 1]> Qi :: char _ (']');
// [13] chargroup: = poschargroup | negchargroup | charclasssub
Re_char_group = re_pos_char_group [_ Val = _ 1]
| Re_neg_char_group [_ Val = _ 1]
| Re_char_class_sub [_ Val = _ 1];
// [14] poschargroup: = (charrange | charclassesc) +
Re_pos_char_group = + (re_char_range | re_char_class_esc) [push_back (at_c <0> (_ Val), _ 1)];
// [15] negchargroup: = '^' poschargroup
Re_neg_char_group = Qi: Char _ ('^')> re_pos_char_group;
// [16] charclasssub: = (poschargroup | negchargroup) '-'charclassexpr
Re_char_class_sub = (re_pos_char_group [at_c <0> (_ Val) = _ 1]
| Re_neg_char_group [at_c <1> (_ Val) = _ 1])
> Qi: Char _('-')
> Re_char_class_expr [at_c <2> (_ Val) = _ 1];
// [17] charrange: = serange | xmlcharincdash
Re_char_range = re_xml_char_inc_dash | re_se_range;
// [18] serange: = charoresc '-'charoresc
Re_se_range = re_char_or_esc> Qi: Char _ ('-')> re_char_or_esc;
// There's no 19th rule
// [20] charoresc: = xmlchar | singlecharesc
Re_char_or_esc = re_xml_char | re_single_char_esc;
// [21] xmlchar: = [^ \ # x2d # x5b # x5d]
Re_xml_char = Qi: Char _-'\'-'['-']'-';
// [22] xmlcharincdash ::= [^ \ # x5b # x5d]
Re_xml_char_inc_dash = Qi: Char _-'\'-'['-']';
// [23] charclassesc: = (singlecharesc | multicharesc | catesc | complesc)
Re_char_class_esc = re_cat_esc | re_compl_esc |
(Qi: Char _ ('\')> Qi: Char _ ("NRT \\.? * +-^ [] () Ssiiccddww "));
// [24] singlecharesc: = '\' [NRT \ | .? * + () {}# X2d # x5b # x5d # x5e]
Re_single_char_esc = Qi: Char _ ('\')> Qi: Char _ ("NRT \\.? * +-^ [] () ");
// [25] catesc: = '\ P {'charprop '}'
Re_cat_esc = Qi: string ("\ P {")> re_char_prop> '}';
// [26] complesc: = '\ P {'charprop '}'
Re_compl_esc = Qi: string ("\ P {")> re_char_prop> '}';
// [27] charprop: = iscategory | isblock
Re_char_prop = re_is_category | re_is_block;
// [28] iscategory: = Letters | marks | numbers | punctuation | separators | symbols | others
Re_is_category = re_letters | re_marks | re_numbers | re_punctuation | re_separators | re_symbols | re_others;
// [29] letters: = 'l' [ultmo]?
Re_letters = Qi: Char _ ('l')>-Qi: Char _ ("ultmo ");
// [30] marks: = 'M' [nce]?
Re_marks = Qi: Char _ ('M')>-Qi: Char _ ("nce ");
// [31] numbers: = 'n' [DLO]?
Re_numbers = Qi: Char _ ('n')>-Qi: Char _ ("DLO ");
// [32] punctuation: = 'P' [cdseifo]?
Re_punctuation = Qi: Char _ ('P')>-Qi: Char _ ("cdseifo ");
// [33] separators: = 'Z' [SLP]?
Re_separators = Qi: Char _ ('Z')>-Qi: Char _ ("SLP ");
// [34] symbols: ='s '[mcko]?
Re_symbols = Qi: Char _ ('s ')>-Qi: Char _ ("mcko ");
// [35] others: = 'C' [cfon]?
Re_others = Qi: Char _ ('C')>-Qi: Char _ ("cfon ");
// [36] isblock: = 'is '[a-zA-Z0-9 # x2d] +
Re_is_block = Qi: string ("is")> + (Qi: Char _ ("a-zA-Z0-9") | Qi: Char _('-'));
// [37] multicharesc: = '\' [ssiiccddww]
Re_multi_char_esc = Qi: Char _ ('\')> Qi: Char _ ("ssiiccddww ");
// [37a] wildcardesc: = '.'
Re_wildcard_esc = Qi: Char _('.');
Boost spirit Parser for XQuery Regexp