利用Regex將html網頁資料變成Web Service

來源:互聯網
上載者:User
web|資料|網頁|正則 這次的題目很簡單,中國銀行有一個查當天匯率的網頁(http://www.bank-of-china.com/info/qpindex.shtml),不過是傳統的Html格式,而其又沒有提供Xml格式或者WebService查詢。現在如果希望其他的資訊系統能夠隨時讀取其中的資料,那麼方便的莫過於中行提供一個WebService介面供大家調用,這也是典型的安全的WebService應用。可惜中行沒有做,那麼我們能不能自己來做呢?當然可以,只要用程式分析其html網頁,那麼就可以很容易的讀取其中的資料。文本分析,當然要看我們的"Regular Expression"(呵呵,其實這才是寫這個程式的真實目的 -- 應用Regex。)

中行的頁面類似於:

日期:2004/09/30 有效期間至2004/10/07


貨幣名稱 現匯買入價 現鈔買入價 賣出價 基準價
英鎊 1488.1700
1453.1500
1492.6400


港幣 105.9700
105.3300
106.2900
106.1100

美元 826.4200
821.4500
828.9000
827.6600

瑞士法郎 655.9300
641.1400
659.2200


新加坡元 488.7600
477.2600
490.2300


瑞典克朗 112.4900
109.8400
112.8300


丹麥幣 136.5900
133.3700
137.0000


挪威克朗 121.9500
119.0800
122.3100


日元 7.4344
7.3785
7.4717
7.4519

加拿大幣 650.8000
635.4800
652.7600


澳大利亞元 591.9900
578.6400
594.9600


歐元 1019.6400
1010.9600
1022.7000
1019.7000

澳門元 103.2200
102.6000
103.5300


菲律賓比索 14.6700
14.3300
14.7200


泰國銖 19.9000
19.4300
19.9600


紐西蘭幣 553.7000

555.3600




對其程式碼分析後,給出了一個Regex,當然這個運算式還不完善,但是針對目前比較固定的中行的匯率頁面來說,暫時還沒有問題。

@"<tr bgcolor='#\w+' ><td height='20'>(?<currency>.*)</td>\s*" +
@"<td height='20'><p align='right'>(?<bankbuytt>\d*.?\d*)( )+.?</td>\s*" +
@"<td height='20'><p align='right'>(?<buynotes>\d*.?\d*)( )+.?</td>\s*" +
@"<td height='20'><p align='right'>(?<sell>\d*.?\d*)( )+.?</td>\s*" +
@"<td height='20'><p align='right'>(?<base>\d*.?\d*)( )+.?</td>\s*"


然後過濾就非常簡單了。我一直以為代碼是最好的說明,特別是對於優雅的語言來說,因為我就不多說了,代碼伺候。

這是所建WebService頁面ForeignExchange.asmx的代碼:

using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Diagnostics;
using System.Web;
using System.Net;
using System.Web.Services;
using System.Xml;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;

namespace ChinaBank
{
/// <summary>
/// Summary description for ForeignExchange.
/// </summary>
[WebService(Namespace="http://dancefires.com/ChinaBank/")]
public class ForeignExchange : System.Web.Services.WebService
{
public ForeignExchange()
{
//CODEGEN: This call is required by the ASP.NET Web Services Designer
InitializeComponent();
}

#region Component Designer generated code

//Required by the Web Services Designer
private IContainer components = null;

/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
}

/// <summary>
/// Clean up any resources being used.
/// </summary>
protected override void Dispose( bool disposing )
{
if(disposing && components != null)
{
components.Dispose();
}
base.Dispose(disposing);
}

#endregion

[WebMethod]
public XmlDataDocument GetForeignExchangeRates()
{
return getXmlDoc();
}
[WebMethod]
public DataSet GetForeignExchangeRatesDataSet()
{
return getXmlDoc().DataSet;
}
[WebMethod]
public string GetBankPage()
{
return getWebContent( "http://www.bank-of-china.com/info/whjrpj.html" );
}
// private methods
private string getWebContent( string url )
{
using( WebClient client = new WebClient() )
{
byte[] buffer = client.DownloadData( url );
string str = Encoding.GetEncoding("GB2312").GetString( buffer, 0, buffer.Length );
return str;
}
}
private XmlDataDocument getXmlDoc()
{
string webcontent = getWebContent("http://www.bank-of-china.com/info/whjrpj.html");

// Prepair for DataSet
DataSet ds = new DataSet("Exchange");
DataTable dt = new DataTable("ForeignExchange");
ds.Tables.Add( dt );
dt.Columns.Add( "Currency", typeof(string) );
dt.Columns.Add( "BankBuyTT", typeof(double) );
dt.Columns.Add( "BankBuyNotes", typeof(double) );
dt.Columns.Add( "BankSell", typeof(double) );
dt.Columns.Add( "Baseline", typeof(double) );
XmlDataDocument xmldoc = new XmlDataDocument( ds );

Regex expr = new Regex(
@"<tr bgcolor='#\w+' ><td height='20'>(?<currency>.*)</td>\s*" +
@"<td height='20'><p align='right'>(?<bankbuytt>\d*.?\d*)( )+.?</td>\s*" +
@"<td height='20'><p align='right'>(?<buynotes>\d*.?\d*)( )+.?</td>\s*" +
@"<td height='20'><p align='right'>(?<sell>\d*.?\d*)( )+.?</td>\s*" +
@"<td height='20'><p align='right'>(?<base>\d*.?\d*)( )+.?</td>\s*"
, RegexOptions.Compiled);
for( Match m = expr.Match(webcontent) ; m.Success ; m=m.NextMatch() )
{
string key;
DataRow row = dt.NewRow();
row["Currency"] = m.Groups["currency"];
key = m.Groups["bankbuytt"].ToString();
row["BankBuyTT"] = key.Length > 0 ? Convert.ToDouble( key )/100 : 0;
key = m.Groups["buynotes"].ToString();
row["BankBuyNotes"] = key.Length > 0 ? Convert.ToDouble( key )/100 : 0;
key = m.Groups["sell"].ToString();
row["BankSell"] = key.Length > 0 ? Convert.ToDouble( key )/100 : 0;
key = m.Groups["base"].ToString();
row["Baseline"] = key.Length > 0 ? Convert.ToDouble( key )/100 : 0;
dt.Rows.Add( row );
}
return xmldoc;
}
}
}

用戶端也很容易,只要用wsdl產生了相應的WebService Proxy後,直接調用就行了,由於我讓Server端返回了DataSet,因此用戶端直接用DataGrid來顯示DataSet即可,非常Easy,在這個問題上用戶端沒有什麼技術關鍵點。

using System;
using System.Threading;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;

namespace BankDataClient
{
/// <summary>
/// Summary description for frmMainBankRates.
/// </summary>
public class frmMainBankRates : System.Windows.Forms.Form
{
private System.Windows.Forms.DataGrid dataGrid1;
private System.Windows.Forms.Button btnConnect;
private System.Data.DataSet ds;
private BankDataClient.com.dancefires.http://www.alixixi.com/program/a/www.ForeignExchange proxy = new BankDataClient.com.dancefires.http://www.alixixi.com/program/a/www.ForeignExchange();
private System.Windows.Forms.TextBox txtUrl;
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.Container components = null;

public frmMainBankRates()
{
//
// Required for Windows Form Designer support
//
InitializeComponent();
try
{
txtUrl.Text = System.Configuration.ConfigurationSettings.AppSettings["url"];
proxy.Url = txtUrl.Text;
}
catch(Exception)
{
proxy.Url = "http://www.dancefires.com/ChinaBank/ForeignExchange.asmx";
txtUrl.Text = proxy.Url;
}
}

/// <summary>
/// Clean up any resources being used.
/// </summary>
protected override void Dispose( bool disposing )
{
if( disposing )
{
if(components != null)
{
components.Dispose();
}
}
base.Dispose( disposing );
}

#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.dataGrid1 = new System.Windows.Forms.DataGrid();
this.ds = new System.Data.DataSet();
this.btnConnect = new System.Windows.Forms.Button();
this.txtUrl = new System.Windows.Forms.TextBox();
((System.ComponentModel.ISupportInitialize)(this.dataGrid1)).BeginInit();
((System.ComponentModel.ISupportInitialize)(this.ds)).BeginInit();
this.SuspendLayout();
//
// dataGrid1
//
this.dataGrid1.DataMember = "";
this.dataGrid1.DataSource = this.ds;
this.dataGrid1.HeaderForeColor = System.Drawing.SystemColors.ControlText;
this.dataGrid1.Location = new System.Drawing.Point(32, 48);
this.dataGrid1.Name = "dataGrid1";
this.dataGrid1.Size = new System.Drawing.Size(480, 256);
this.dataGrid1.TabIndex = 0;
//
// ds
//
this.ds.DataSetName = "Exchange";
this.ds.Locale = new System.Globalization.CultureInfo("zh-CN");
//
// btnConnect
//
this.btnConnect.Location = new System.Drawing.Point(432, 16);
this.btnConnect.Name = "btnConnect";
this.btnConnect.TabIndex = 1;
this.btnConnect.Text = "串連";
this.btnConnect.Click += new System.EventHandler(this.btnConnect_Click);
//
// txtUrl
//
this.txtUrl.Location = new System.Drawing.Point(32, 16);
this.txtUrl.Name = "txtUrl";
this.txtUrl.Size = new System.Drawing.Size(384, 20);
this.txtUrl.TabIndex = 2;
this.txtUrl.Text = "";
//
// frmMainBankRates
//
this.AutoScaleBaseSize = new System.Drawing.Size(5, 13);
this.ClientSize = new System.Drawing.Size(544, 318);
this.Controls.Add(this.txtUrl);
this.Controls.Add(this.btnConnect);
this.Controls.Add(this.dataGrid1);
this.Name = "frmMainBankRates";
this.Text = "Foreign Exchange Rates of Bank of China";
((System.ComponentModel.ISupportInitialize)(this.dataGrid1)).EndInit();
((System.ComponentModel.ISupportInitialize)(this.ds)).EndInit();
this.ResumeLayout(false);

}
#endregion

private void btnConnect_Click(object sender, System.EventArgs e)
{
UpdateDataGrid();
}
private void UpdateDataGrid()
{
try
{
btnConnect.Enabled = false;
txtUrl.ReadOnly = true;
proxy.Url = txtUrl.Text;
ds = proxy.GetForeignExchangeRatesDataSet();
dataGrid1.SetDataBinding( ds, "ForeignExchange" );
dataGrid1.Update();
}
catch( Exception err )
{
MessageBox.Show( err.Message );
}
finally
{
txtUrl.ReadOnly = false;
btnConnect.Enabled = true;
}
}
[STAThread]
static void Main( string[] args )
{
Application.Run( new frmMainBankRates() );
}
}
}

有了這個例子,應該可以從中瞭解最基本的XML, WebService, Regular Expression, DataSet, DataGrid的知識。

軟體所有代碼,及相關截屏可以從下面的串連中獲得:

http://www.dancefires.com/ChinaBank/





相關文章

E-Commerce Solutions

Leverage the same tools powering the Alibaba Ecosystem

Learn more >

Apsara Conference 2019

The Rise of Data Intelligence, September 25th - 27th, Hangzhou, China

Learn more >

Alibaba Cloud Free Trial

Learn and experience the power of Alibaba Cloud with a free trial worth $300-1200 USD

Learn more >

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。