最近在寫一個案頭的豆瓣電台程式,在解析其DJ電台的頻道列表時,在網頁中抓取的js代碼如下:
channelInfo.dj = [{"channel_id":"l3","name":"誰的成名曲"},{"channel_id":"113044","name":"淺談輕唱","timestamp":"1333103365.0","update":0},{"channel_id":"115987","name":"爵士之音","timestamp":"1333952457.0","update":0},{"channel_id":"106379","name":"萌得耳松","timestamp":"1330493624.0","update":0},{"channel_id":"116154","name":"子寒Ann","timestamp":"1334220284.0","update":0},{"channel_id":"119300","name":"黑噪音","timestamp":"1332596519.0","update":0},{"channel_id":"103756","name":"周准","timestamp":"1310296419.0","update":0},{"channel_id":"113914","name":"蘇比","timestamp":"1334727058.0","update":0},{"channel_id":"118974","name":"iPlay","timestamp":"1332410472.0","update":0},{"channel_id":"104001","name":"有待","timestamp":"1322014846.0","update":0},{"channel_id":"l1","name":"Facelook"},{"channel_id":"123048","name":"宅電波","timestamp":"1329186348.0","update":0},{"channel_id":"113380","name":"擱料廣播","timestamp":"1335229294.0","update":0},{"channel_id":"104524","name":"FM O.U.R","timestamp":"1332233929.0","update":0},{"channel_id":"111880","name":"三角龍","timestamp":"1333598037.0","update":0},{"channel_id":"112177","name":"掌柜阿峻","timestamp":"1335334224.0","update":0},{"channel_id":"103547","name":"阿鵬","timestamp":"1317363315.0","update":0},{"channel_id":"106166","name":"離岸音樂電台","timestamp":"1329901317.0","update":0},{"channel_id":"119466","name":"Sorry! FM","timestamp":"1324311596.0","update":0},{"channel_id":"116241","name":"壞蛋調頻","timestamp":"1328582570.0","update":0},{"channel_id":"l2","name":"磁帶斷了"},{"channel_id":"105062","name":"ACHA","timestamp":"1333435761.0","update":0},{"channel_id":"110186","name":"公告牌之外","timestamp":"1319694482.0","update":0},{"channel_id":"121068","name":"棗聽樂紀","timestamp":"1325663931.0","update":0},{"channel_id":"114014","name":"Suburbs","timestamp":"1331610037.0","update":0},{"channel_id":"118673","name":"IndiePower","timestamp":"1333171142.0","update":0},{"channel_id":"121069","name":"666","timestamp":"1321876995.0","update":0},{"channel_id":"116351","name":"Rêveur Ailé","timestamp":"1329894116.0","update":0},{"channel_id":"120705","name":"糖蒜廣播","timestamp":"1319420901.0","update":0},{"channel_id":"117447","name":"SICK RADIO","timestamp":"1323311571.0","update":0},{"channel_id":"121379","name":"電音中國","timestamp":"1328175960.0","update":0},{"channel_id":"118962","name":"CT-808","timestamp":"1308483701.0","update":0}];
要解析這段代碼,涉及到幾個知識點:1.Regex,要在網頁中抓取相應的代碼,必須會使用Regex。2.JSON的還原序列化操作。
這裡主要寫一下自己在這兩天中碰到的第二個問題:
這段代碼不是典型的JSON對象,而是JSON對象數組。用JSON對象的解析方法不能達到預期想要的結果,編譯器總是報“應為來自命名空間“”的元素“root”。。遇到名稱為“”、命名空間為“”的“None”或“還原序列化對象 屬於類型 Doubaner.DBFM.DJChannel 時出現錯誤。遇到意外字元“c””。碰到這種錯誤,編譯器的提示多半對於解決問題沒有多大的協助,MSDN上也沒有寫的很清楚。
無奈,花了兩天時間,也沒想出較好的解決方案。最後是在網上搜到的類似的程式中看到了別人的解決方案:
在我們定義的還原序列化操作的類中,建立一個類,繼承自該類的List<T> 泛型,然後編譯器便可正確的將該段代碼還原序列化。
下面是關鍵的幾處代碼:
DJChannels djc = new DJChannels();//這是自己定義的DJ頻道列表的類,繼承自List<DJChannel> try { DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(DJChannels)); MemoryStream ms = new MemoryStream(Encoding.UTF8.GetBytes(match.Groups[1].Value)); //string a = new StreamReader(ms).ReadToEnd(); djc = (DJChannels)ser.ReadObject(ms); } catch (Exception ex) { LogUtil.Log(ex.ToString()); }
/// <summary> /// DJ頻道類 /// </summary> [DataContract] class DJChannel { /// <summary> /// 頻道ID /// </summary> [DataMember(Name="channel_id")] public string channel {get;set;} /// <summary> /// 頻道名 /// </summary> [DataMember(Name="name")] public string name {get;set;} /// <summary> /// /// </summary> [DataMember] public string timestamp { get; set; } [DataMember] public string update { get; set; } } internal class DJChannels : List<DJChannel> { }