C# 網頁資料表格抓取資料

來源:互聯網
上載者:User

標籤:blog   window   raw   bsp   class   ase   pat   name   字串   

 

 

主要方法:

public List<string> datasearch()
{
List<string> list = new List<string>();

string url = @""+txtUrl.Text.Trim().ToString();

WebRequest request = WebRequest.Create(url); //請求url
WebResponse response = request.GetResponse(); //擷取url資料
StreamReader reader = null;
switch (cboCode.SelectedItem.ToString())
{
case "UTF-8":
reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("UTF-8"));
break;
case "Default":
reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
break;
default:
reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
break;
}
string str = reader.ReadToEnd(); //將資料寫入到textbox中

reader.Close();
reader.Dispose();
response.Close();
string strRegexR = @"(?<=<tr>)([\s\S]*?)(?=</tr>)"; //構造解析表格行資料的Regex
string strRegexD = @"(?<=<td[^>]*>[\s]*?)([\S]*)(?=[\s]*?</td>)"; //構造解析表格列資料的Regex
Regex regexR = new Regex(strRegexR);
MatchCollection mcR = regexR.Matches(str); //執行匹配
bool first = true;
foreach (Match mr in mcR)
{
Regex regexD = new Regex(strRegexD);
MatchCollection mcD = regexD.Matches(mr.Groups[0].ToString()); //執行匹配

string Mydata = "";
for (int i = 0;i < mcD.Count; i++)
{
Mydata += mcD[i].Value + " ";
}
list.Add(Mydata);
}

return list;
}

Form1.cs 檔案

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Text.RegularExpressions;
using System.IO;
using System.Net;

namespace Demo
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
cboCode.SelectedIndex = 0;
}

public List<string> datasearch()
{
List<string> list = new List<string>();

string url = @""+txtUrl.Text.Trim().ToString();

WebRequest request = WebRequest.Create(url); //請求url
WebResponse response = request.GetResponse(); //擷取url資料
StreamReader reader = null;
switch (cboCode.SelectedItem.ToString())
{
case "UTF-8":
reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("UTF-8"));
break;
case "Default":
reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
break;
default:
reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
break;
}
string str = reader.ReadToEnd(); //將資料寫入到textbox中

reader.Close();
reader.Dispose();
response.Close();
string strRegexR = @"(?<=<tr>)([\s\S]*?)(?=</tr>)"; //構造解析表格式資料的Regex
string strRegexD = @"(?<=<td[^>]*>[\s]*?)([\S]*)(?=[\s]*?</td>)";
Regex regexR = new Regex(strRegexR);
MatchCollection mcR = regexR.Matches(str); //執行匹配
bool first = true;
foreach (Match mr in mcR)
{
Regex regexD = new Regex(strRegexD);
MatchCollection mcD = regexD.Matches(mr.Groups[0].ToString()); //執行匹配

string Mydata = "";
for (int i = 0;i < mcD.Count; i++)
{
Mydata += mcD[i].Value + " ";
}
list.Add(Mydata);
}

return list;
}
private void btnGet_Click(object sender, EventArgs e)
{
List<string> list = datasearch();

string str = "";
for (int i = 0; i < list.Count; i++)
{

str += list[i].ToString() + @"
";
}
txtResult.Text = str;
}

private void btnSave_Click(object sender, EventArgs e)
{
TXTHelper.TxtSaveByStr(@"" + txtSaveUrl.Text.Trim(), txtResult.Text);//C:\Users\Administrator\Desktop\Work File\網頁表格抓取\1.txt
MessageBox.Show("儲存成功!");
}

private void btnAdd_Click(object sender, EventArgs e)
{
TXTHelper.TxtAddByStr(@"" + txtSaveUrl.Text.Trim(), txtResult.Text);
MessageBox.Show("儲存添加成功!");
}
}
}

TXTHelper.cs 檔案

 

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Collections.Specialized;

 

namespace Demo
{
public class TXTHelper
{
public static StringCollection Read_txt(string url) //讀取TXT內容
{
FileStream fs = new FileStream(url, FileMode.Open, FileAccess.Read);
/**/
///定義輸出字串
StringCollection collection = new StringCollection();

 

/**/
///初始化該字串的長度為0

 


/**/
///為上面建立的檔案流建立讀取資料流
StreamReader read = new StreamReader(fs);

 

/**/
///設定當前流的起始位置為檔案流的起始點
read.BaseStream.Seek(0, SeekOrigin.Begin);

 

/**/
///讀取檔案
while (read.Peek() > -1)
{
/**/
///取檔案的一行內容並換行
///
string str = read.ReadLine();
collection.Add(str);
}

 

/**/
///關閉釋放讀資料流
read.Close();
fs.Close();
/**/
///返回讀到的記錄檔內容
return collection;
}
public static void Update_txt(string url) //修改TXT
{
FileStream fs = new FileStream(url, FileMode.OpenOrCreate, FileAccess.Write);
StreamWriter sw = new StreamWriter(fs);
sw.Flush();
sw.BaseStream.Seek(0, SeekOrigin.Begin);
sw.Write("1");
sw.Flush();
sw.Close();
}

 

public static void TxtSaveByStr(string savePath, string txtStr)
{
FileStream fs = new FileStream(savePath, FileMode.Create);
StreamWriter sw = new StreamWriter(fs);
//開始寫入
sw.Write(txtStr);
//清空緩衝區
sw.Flush();
//關閉流
sw.Close();
fs.Close();
}
public static void TxtAddByStr(string savePath, string txtStr)
{
FileStream fs = new FileStream(savePath, FileMode.Append);
StreamWriter sw = new StreamWriter(fs);
//開始寫入
sw.Write(txtStr);
//清空緩衝區
sw.Flush();
//關閉流
sw.Close();
fs.Close();
}

 

public static DateTime File_Info(string url) //讀取txt修改時間
{
FileInfo fi = new FileInfo(url);
DateTime d = fi.LastWriteTime;
return d;
}

 

}
}

 

運行結果圖:

C# 網頁資料表格抓取資料

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.