Regular Expression Accumulation

Last Update:2018-12-05 Source: Internet

Author: User

Tags character classes

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

Using System;
Using System. Collections. Generic;
Using System. Linq;
Using System. Text;
Using System. Text. RegularExpressions;

Namespace Regular Expression Accumulation
{
Class Program
{
Static void Main (string [] args)
{
// Fun1 ();
// Fun2 ();
// Fun3 ();
// Fun4 ();
// Fun5 ();
// Fun6 ();
// Fun7 ();
Fun8 ();
// Regex reg = new Regex ("^ a. * B $ ");
// String str = "aHello \ nb ";
// Console. WriteLine (reg. Match (str). Value );
}

/// <Summary>
/// Group Reference
/// </Summary>
Static void Fun1 ()
{
String str = "'hello '";
String str2 = "'Hello \"";

// In a regular expression, the \ integer indicates that the text in the parentheses of the nth integer is referenced.
// The \ 1 at the end of this regular expression references the text (single or double quotation marks) in the first bracket of this regular expression. This regular expression matches the quotation marks on both sides of Hello.
Regex reg = new Regex ("(['\"]) [a-zA-Z] * \ 1 ");

// The match is successful, because the front and back of Hello are single quotes
Match m = reg. Match (str );
// M. Groups [0]. Value is actually m. Value
Console. WriteLine (m. Groups [0]. Value );
// M. Groups [1]. Value is the text in the first square brackets in the regular expression'
Console. WriteLine (m. Groups [1]. Value );

Console. WriteLine (reg. IsMatch (str2 ));

// When the regular Replace method is used, the $ integer indicates that the text in the parentheses of the nth integer is referenced.
Console. WriteLine (reg. Replace ("123 'hello' 456", "$1"); // Replace the result with "123" 456"

// The preceding two methods reference the text in brackets. brackets are counted from the left brackets.

// Use parentheses for grouping, but it is not counted as reference: add? :
// The \ 1 in reg2 should reference 123, but the brackets containing 123 are (? :), So this bracket does not participate in reference count, so the matching is successful.
Regex reg2 = new Regex ("(? : 123) * (['\ "]) [a-zA-Z] * \ 1 ");
Match m2 = reg2.Match (str );
Console. WriteLine (reg2.IsMatch (str ));
Console. WriteLine (m2.Groups [0]. Value );
Console. WriteLine (m2.Groups. Count); // because (? : 123) does not participate in reference Count, so m2.Groups. Count is 2
}

/// <Summary>
/// Match the specified location
/// </Summary>
Static void Fun2 ()
{
Regex reg = new Regex ("\ babc \ B ");
String str1 = "I am abc ";
String str2 = "I amabc ";
Console. WriteLine (reg. IsMatch (str1 ));
Console. WriteLine (reg. IsMatch (str2 ));

// The following example is invalid in JS.
Regex reg2 = new Regex ("\ B Hello \ B ");
String str3 = "";
String str4 = "";
Console. WriteLine (reg2.IsMatch (str3 ));
Console. WriteLine (reg2.IsMatch (str4 ));
}

/// <Summary>
/// Forward and backward declarations
/// </Summary>
Static void Fun3 ()
{
// Forward-to-forward matching. It must be followed by a colon after [jJ] ava [jJ] ava [sS] cola, but the matched value does not include a colon.
Regex reg = new Regex ("[Jj] ava ([Ss] Ghost )? (? = \\:)");
String str = "Javascript: Good ";
Console. Write (reg. IsMatch (str) + "\ t ");
Console. WriteLine (reg. Match (str). Value); // The matched Value is Javascript (No colon is included)

Console. WriteLine (reg. IsMatch ("Javascript: Good") + "\ t"); // mismatch because there is a space between t and colon

Console. Write (reg. IsMatch ("Java: Good") + "\ t"); // match
Console. WriteLine (reg. Match ("Java: Good"). Value );

Console. WriteLine (reg. IsMatch ("Java: Good") + "\ t"); // mismatch, because there is a space between a and the colon

// Backward declaration, followed by any character in java, but cannot be a script and case-insensitive
Regex reg2 = new Regex ("Java (?! Script) (. *) ", RegexOptions. IgnoreCase );
Console. WriteLine (reg2.IsMatch ("Your crip") + "\ t" + reg2.Match ("Your crip"). Value );
Console. WriteLine (reg2.IsMatch ("Javascript is good") + "\ t" + reg2.Match ("Javascript"). Value );

Console. WriteLine (reg2.IsMatch ("Java Script is good") + "\ t" + reg2.Match ("Java script is good"). Value );

// Can brackets be referenced in the anti-Forward Declaration?
Regex reg3 = new Regex ("(['\"]) hello [^ \ 1] "); // The original match is 'Hello and another non 'or "hello and another non", so it does not match strEnd, however, it is invalid to use parentheses for reference in character classes.
String strEnd = "'hello '";
String strEnd2 = "'Hello \"";
Console. WriteLine (reg3.IsMatch (strEnd ));
// Can we reference it with parentheses in the forward declaration? refer to the following code:
Regex regEnd = new Regex ("(['\"]) hello (?! \ 1 )");
Console. WriteLine (regEnd. IsMatch (strEnd); // matching failure, which indicates that brackets can be referenced in the reverse Forward Declaration.
Console. writeLine (regEnd. isMatch (strEnd2) + "\ t" + regEnd. match (strEnd2 ). value); // The match is successful, but the "after" hello "is not matched.
Regex regEnd2 = new Regex ("(['\"]) hello (?! \ 1). + ");
Console. WriteLine (regEnd2.IsMatch (strEnd2) + "\ t" + regEnd2.Match (strEnd2). Value); // The matching is successful, and the "after" hello "is matched.
}

/// <Summary>
/// Backtracking and non-backtracking
/// </Summary>
Static void Fun4 ()
{
# Region first example
String str = "www.csdn.net ";
Regex reg1 = new Regex (@ "\ w + \. (. *) \. \ w + ");
Console. WriteLine (reg1.IsMatch (str); // match successful

Regex reg2 = new Regex (@ "\ w + \. (?>. *) \. \ W + ");
Console. WriteLine (reg2.IsMatch (str); // match failed

/*
"Www.csdn.net" can be matched by @ "\ w + \. (. *) \. \ w +", but not by @ "\ w + \. (?>. *) \. \ W + "to match! Why?

The reason is that regular expression matching is greedy. When matching, it will match as many results as possible. Therefore, in the above two regular expressions. * All match csdn.net. At this time, the first expression is found at the start of matching \. \ w + has no character to match it, so it will perform backtracking. * The matching result is pushed back. The characters that are returned are used to match \. \ w + \. \ w + matches successfully, and the entire expression returns a successful match result. The second expression uses non-backtracing matching. Therefore, after a. * match is completed, it is not allowed to use backtracing to match \. \ w +. Therefore, the entire expression fails to match.

Please note that backtracking matching is a waste of resources. Therefore, please try to avoid your regular expression from successfully matching through backtracking, as shown in the above example, you can replace it with @ "\ w + \. ([^ \.] + \.) +
*/
# Endregion

# Region second example
String x = "Live for nothing, die for something ";
Regex r1 = new Regex (@ ". * thing ,");
If (r1.IsMatch (x ))
{
Console. WriteLine ("match:" + r1.Match (x). Value); // output: Live for nothing,
}

Regex r2 = new Regex (@ "(?>. *) Thing ,");
If (r2.IsMatch (x) // Mismatch
{
Console. WriteLine ("match:" + r2.Match (x). Value );
}
Else
{
Console. WriteLine ("mismatch ");
}
// In r1, ". *" is always matched to the end of the string due to its greedy nature,
// Then match "thing", but fails when "," is matched,
// At this time, the engine will trace back and match successfully at "thing.
// In r2, the entire expression fails to be matched due to forced non-backtracking.
# Endregion
}

/// <Summary>
/// Two methods that do not contain a string
/// </Summary>
Static void Fun5 ()
{
# Region first
String str = "Java script is good ";
String strP1 = "<p> Helo World </p> ";
String strP2 = "<p> Hello <div> Realy </div> </p> ";
// Match a string. This string starts with "java" but cannot contain the "script" substring. Note the following when using this method:
// 1. * It must be placed out of braces and cannot be written ((?! Script). *). If it is placed in it, the regular expression will compare the "script" with the "script" incorrectly (because it is just greedy)
// 2. is there $ at the end of a regular expression that produces two results: (1) if $ is present, no matching result can be found if the string contains the substring "script; (2) If there is no $, the regular expression will be used to find whether there is a match between the latter and the latter before "script". If str is "Java script is good" (both in this example) will match "java". If str is "script into DF", it will match "into DF"
Regex regMost = new Regex ("java ((?! Script).) * $ ", RegexOptions. IgnoreCase );
Match m = regMost. Match (str );
Console. WriteLine (regMost. IsMatch (str ));

// If we want to match a string that cannot contain a substring, we also want to obtain the matched substring, for example, if you want to obtain the content in a <p> tag, the <p> tag cannot contain the <div> tag.

// In ((?! <Div>).) * adds a bracket to obtain the matched text.
Regex regP = new Regex ("<p> (((?! <Div>).) *) </p> ");

Match mp = regP. Match (strP1 );
// Match successful
If (mp. Success)
{
Console. WriteLine (mp. Groups [1]. Value );
}

// Match failed
If (regP. IsMatch (strP2 ))
{
}
Else
{
Console. WriteLine ("unmatched" + strP2 );
}
# Endregion

# Region 2
// String myStr = "Java script is good ";
//// Match a string starting with "java", and the matched string cannot contain "script"
// Regex myReg = new Regex ("Java (?!. * Script. *) ", RegexOptions. IgnoreCase );
// Match m2 = myReg. Match (myStr );
// Console. WriteLine (m2.Success );

/// Regex regL = new Regex ("(<p> ((?! <\/P>).) * <\/p>) | (<br \/> )");

Regex myReg2 = new Regex ("<p> (?!. * <Div>. *) (. *) </p> ");
Match myRes = myReg2.Match (strP1 );
If (myRes. Success)
{
Console. WriteLine (myRes. Groups [1]. Value );
}
If (myReg2.IsMatch (strP2 ))
{
}
Else
{
Console. WriteLine ("unmatched" + strP2 );
}
# Endregion
}

/// <Summary>
/// The regular expression that will never match
/// </Summary>
Static void Fun6 ()
{
Regex reg = new Regex ("q (? = U) I ");
String s = "quit ";
// Mismatch, because the Forward Declaration does not actually consume characters
// Console. WriteLine (reg. IsMatch (s ));

// The following regular expression can be matched because \ d consumes 4
Regex reg2 = new Regex ("123 (? = 4) \ dabc ");
Console. WriteLine (reg2.IsMatch ("1234abc "));
}

/// <Summary>
/// View back
/// </Summary>
Static void Fun7 ()
{
String str = "abc123mm ";
// View the result in a positive fashion.
Regex reg = new Regex ("(? <= 123) mm ");
Console. WriteLine (reg. IsMatch (str); // match successful
If (reg. IsMatch (str ))
{
Console. WriteLine (reg. Match (str). Value );
}
Console. WriteLine (reg. IsMatch ("23mm"); // matching failed
Console. WriteLine ("--------------------------------------------------------");
// View the negative expression backward
Regex reg2 = new Regex ("(? <! 123) mm ");
Console. WriteLine (reg2.IsMatch ("abc 123mm"); // match failed
Console. WriteLine (reg2.IsMatch ("abcmm"); // match successful
Console. WriteLine (reg2.Match ("abcmm"). Value );
}

/// <Summary>
/// Single-row mode and multi-row Mode
/// </Summary>
Static void Fun8 ()
{
// 1. The default mode is neither single-line nor multi-line.
Console. WriteLine (Regex. IsMatch ("a \ nb", "a. {1} B"); //. The line break cannot match.
// The following two examples show that when neither the single-line mode nor the multi-line mode is used, ^ can only match the start of a string, and $ can match the end of a string and the end of a row.
Console. WriteLine (Regex. IsMatch ("a \ nb", "^ a \ nb $"); // ^ matches the start of a string and $ matches the end of a string
Console. WriteLine (Regex. IsMatch ("\ nab", "^ AB"); // ^ cannot match the beginning of a row
Console. WriteLine (Regex. IsMatch ("AB \ n", "AB $"); // $ can match the end of a row

// Single row Mode
Console. WriteLine (Regex. IsMatch ("a \ nb", "a. {1} B", RegexOptions. Singleline); // you can match the linefeed.
Console. writeLine (Regex. isMatch ("a \ nb", "^. {1} B $ ", RegexOptions. singleline); // ^ matches the start of a string and $ matches the end of a string.
Console. WriteLine (Regex. IsMatch ("\ nab", "^ AB", RegexOptions. Singleline); // ^ cannot match the beginning of a row
Console. WriteLine (Regex. IsMatch ("AB \ n", "AB $", RegexOptions. Singleline); // $ matches the end of a row

// Multiline Mode
Console. WriteLine (Regex. IsMatch ("a \ nb", "a. {1} B", RegexOptions. Multiline); //. line breaks cannot match
Console. WriteLine (Regex. IsMatch ("AB", "^ AB $", RegexOptions. Multiline); // ^ matches the start of a string and $ matches the end of a string.
Console. WriteLine (Regex. IsMatch ("\ nab", "^ AB", RegexOptions. Multiline); // ^ matches the beginning of a row
Console. WriteLine (Regex. IsMatch ("AB \ n", "AB $", RegexOptions. Multiline); // $ matches the end of a row
Console. WriteLine (Regex. IsMatch ("\ nab \ n", "^ AB $", RegexOptions. Multiline); // ^ matches the beginning of a row, and $ matches the end of a row.
}
}
}

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More