最近由於工作項目中的需要,開始關注起了正則,初識正則,看著著急的同時又表示一頭霧水。。。還好現在工作已經告一段落了,非常非常感謝罈子裡面前輩的協助,當然也有自身的學習。O(∩_∩)O
工作之餘,把所用到的正則簡單的整理了下,備忘,同時也給遇到類似問題的童鞋做個參考。
規則:
1.根據id擷取標籤裡面的內容
2.擷取頁面中的img及img裡面的屬性src的內容
3.擷取所有的script
4.特殊取值:如取 DATA.groupList = [{...}]裡面的內容 廢話不多說了,直接貼代碼了。。
下面這個是要用到的一個字串:
<img class="avatar" src="/cgi-bin/getheadimg?fakeid=2391433120&r=835720"><script type="text/javascript">WXM.DATA.userinfo = {};WXM.DATA = {ROOT : WXM.ROOT,userinfo : {NickName : "yoyo",FakeID : "2391433120"},nav : [{_id: 'home',name : "首頁", link : '/cgi-bin/indexpage?t=wxm-index&lang=zh_CN'}], };</script> <script type="json" id="json-setting">{"username":"haha","signature":"","country":"中國","province":"上海","city":"浦東新區","verifyInfo":"ready go?","bindUserName":""}</script> <script type="text/javascript">window.WXM && (function(WXM, jq, win){ DATA.title = "使用者管理"; DATA.groupList = [ { id: '0', name: defaultGroupName[0] || "預設組", num : '0'*1 }, { id: '100', name: defaultGroupName[100] || "F", num : '2'*1 } ]; })(WXM, jQuery, window);</script> <script type="text/javascript">WXM.DATA.userinfo = {};WXM.DATA = {ROOT : WXM.ROOT,userinfo : {NickName : "yoyo",FakeID : "2391433120"},nav : [{_id: 'home',name : "首頁", link : '/cgi-bin/indexpage?t=wxm-index&lang=zh_CN'}]
};</script>
完整的代碼如下:
C#:codeusing System;using System.Collections.Generic;using System.Linq;using System.Web;using System.Web.UI;using System.Web.UI.WebControls;using System.Text;using System.Text.RegularExpressions;public partial class zz : System.Web.UI.Page{ protected void Page_Load(object sender, EventArgs e) { //*****尋找id="json-setting"的標籤及內容 此處擷取到的是一個script,所以無法在頁面上顯示文本,只有查看網頁源碼才能看到*****/ string pattern = @"<(?<HtmlTag>[\w]+)[^>]*\s[iI][dD]=(?<Quote>[""']?)json-setting(?(Quote)\k<Quote>)[""']?[^>]*>((?<Nested><\k<HtmlTag>[^>]*>)|</\k<HtmlTag>>(?<-Nested>)|.*?)*</\k<HtmlTag>>"; //*****尋找 DATA.groupList = [{...}]裡面的內容*****/ //string pattern = @"(?<=DATA\.groupList\s*=\s*\[)((?<g>\[)|(?<-g>\])|[^\]\[])*(?(g)(?!))(?=\])"; //*****尋找出所有的script*****// string pattern1 = @"<script[^>]*?>.*?</script>|<script[^>]*>[\d\D]*?</script>"; //******尋找圖片******// //string pattern2 = "<img class=\"(.*?)\".*/>"; //帥選出img //string pattern2 = "src=\"(?<value>.*?)\""; //擷取圖片src的值 MatchCollection m = Regex.Matches(GetHtm(), pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline); string str = ""; if (m.Count > 0) { str = m[0].Groups[0].Value; Response.Write("擷取成功!" + str); } else { Response.Write("擷取失敗!"); } Fn(GetHtm(),pattern1); //Fn(GetHtm(),pattern2); } public void Fn(string strhtm,string pattern) { MatchCollection m = Regex.Matches(strhtm, pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline); string str = ""; if (m.Count > 0) { foreach (Match mc in m) { str+= mc.Value; //str+ = mc.Groups["value"].Value; //擷取圖片src的值 } Response.Write("擷取成功!" + str); } else { Response.Write("擷取失敗!"); } } public string GetHtm() { StringBuilder sb = new StringBuilder(); sb.AppendLine("<img class=\"avatar\" src=\"/cgi-bin/getheadimg?fakeid=2391433120&r=835720\" />"); sb.AppendLine("<script type=\"text/javascript\">"); sb.AppendLine("WXM.DATA.userinfo = {};"); sb.AppendLine("WXM.DATA = {"); sb.AppendLine("ROOT : WXM.ROOT,"); sb.AppendLine("userinfo : {"); sb.AppendLine("NickName : \"yoyo\","); sb.AppendLine("FakeID : \"2391433120\""); sb.AppendLine("},"); sb.AppendLine("nav : ["); sb.AppendLine("{"); sb.AppendLine("_id: 'home',"); sb.AppendLine("name : \"首頁\", "); sb.AppendLine("link : '/cgi-bin/indexpage?t=wxm-index&lang=zh_CN'"); sb.AppendLine("}],"); sb.AppendLine(""); sb.AppendLine("};"); sb.AppendLine(""); sb.AppendLine("</script>"); sb.AppendLine(" <script type=\"json\" id=\"json-setting\">"); sb.AppendLine("{\"username\":\"haha\",\"signature\":\"\",\"country\":\"中國\",\"province\":\"上海\",\"city\":\"浦東新"); sb.AppendLine("區\",\"verifyInfo\":\"ready go?\",\"bindUserName\":\"\"}</script>"); sb.AppendLine(""); sb.AppendLine(" <script type=\"text/javascript\">"); sb.AppendLine("window.WXM && (function(WXM, jq, win){"); sb.AppendLine(""); sb.AppendLine(" DATA.title = \"使用者管理\";"); sb.AppendLine(" DATA.groupList = ["); sb.AppendLine(" {"); sb.AppendLine(" id: '0',"); sb.AppendLine(" name: defaultGroupName[0] || \"預設組\","); sb.AppendLine(" num : '0'*1"); sb.AppendLine(" }, {"); sb.AppendLine(" id: '100',"); sb.AppendLine(" name: defaultGroupName[100] || \"F\","); sb.AppendLine(" num : '2'*1"); sb.AppendLine(" } "); sb.AppendLine(" ];"); sb.AppendLine(" "); sb.AppendLine("})(WXM, jQuery, window);"); sb.AppendLine("</script>"); sb.AppendLine(""); sb.AppendLine(" <script type=\"text/javascript\">"); sb.AppendLine("WXM.DATA.userinfo = {};"); sb.AppendLine("WXM.DATA = {"); sb.AppendLine("ROOT : WXM.ROOT,"); sb.AppendLine(""); sb.AppendLine("userinfo : {"); sb.AppendLine("NickName : \"yoyo\","); sb.AppendLine("FakeID : \"2391433120\""); sb.AppendLine("},"); sb.AppendLine("nav : ["); sb.AppendLine("{"); sb.AppendLine("_id: 'home',"); sb.AppendLine("name : \"首頁\", "); sb.AppendLine("link : '/cgi-bin/indexpage?t=wxm-index&lang=zh_CN'"); sb.AppendLine("}"); sb.AppendLine("]"); sb.AppendLine("};</script> "); return sb.ToString(); }}