C#讀取HTML檔案內容寫入記事本

來源:互聯網
上載者:User
C#讀取HTML檔案內容寫入記事本


try
{
int totalFile = 0;
//string dirPath = @"E:\chfuMetarnet\BSC6810 alarm\";
if (this.textBox1.Text.Trim() == "")
{
MessageBox.Show("請輸入HTML檔案路徑!");
}
else
{
string dirPath = this.textBox1.Text.Trim();
if (!dirPath.Substring(dirPath.Length - 1).Contains("\\"))
{
dirPath = dirPath+"\\";
}
StreamWriter sw;
DirectoryInfo dirInfo = new DirectoryInfo(dirPath);
FileInfo[] files = dirInfo.GetFiles();
string filename = dirPath + "警示經驗庫資訊.txt";
if (File.Exists(filename))
{
sw = File.AppendText(filename);
}
else
{
sw = File.CreateText(filename);
}
foreach (FileInfo fileinfo in files)
{
if (fileinfo.Extension.Equals(".htm"))//遍曆所有htm檔案
{
totalFile = totalFile + 1;
WebRequest myWebRequest = WebRequest.Create(dirPath + fileinfo.Name);
WebResponse myWebResponse = myWebRequest.GetResponse();
Stream myStream = myWebResponse.GetResponseStream();
Encoding encode = System.Text.Encoding.GetEncoding("gb2312");
StreamReader myStreamReader = new StreamReader(myStream, encode);
string strhtml = myStreamReader.ReadToEnd();
myWebResponse.Close();
string stroutput = strhtml;
Regex regex = new Regex(@"<[^>]+>|</[^>]+>");//去掉HTML標記的Regex
string tmpStr = "<h4>([^<]*)</h4>"; //擷取<h4>之間內容的運算式
Match TitleMatch = Regex.Match(strhtml, tmpStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
string causename = TitleMatch.Value.ToString();//包含<h4>和</h4>標記
causename = Regex.Replace(causename, "[\n|\r|\t]", " ");//去掉換行和TAB鍵符號
causename = causename.Trim();
string cause = causename.Substring(4, causename.Length - 9);//得到警示原因
string titleStr = "<title>([^<]*)</title>";
TitleMatch = Regex.Match(strhtml, titleStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
string titlename = TitleMatch.Value.ToString();
titlename = Regex.Replace(titlename, "[\n|\r|\t]", "");//去掉換行和TAB鍵符號
titlename = titlename.Trim();
string regexStr = "<ul><li>(?<key>.*?)</ul>";//擷取<ul><li>後邊的內容,直到</ul>結尾
Regex r = new Regex(regexStr, RegexOptions.None);
strhtml = Regex.Replace(strhtml, "[\n|\r|\t]", "");//去掉換行和TAB鍵符號
Match mc = r.Match(strhtml);
string dataStr = mc.Groups["key"].Value;
dataStr = "<ul><li>" + dataStr + "</ul>";//得到完整的<ul></ul>之間的源碼
strhtml = strhtml.Replace(dataStr, "");//將去掉分行符號和tab鍵的源碼中去除<ul></ul>部分源碼
strhtml = strhtml.Replace(titlename, "");//去掉<title></title>
strhtml = regex.Replace(strhtml, " ");//過濾掉HTML標記
strhtml = strhtml.Replace("&nbsp;", "");//去掉空白字元
string[] arr = cause.Split(' ');
string zhCause = arr[arr.Length - 1];//擷取數組最後一個元素:警示原因
sw.WriteLine("第" + totalFile + "個檔案:" + fileinfo.Name);
sw.WriteLine("-----警示原因------:");
//sw.WriteLine(cause);// ALM-1 網元啟動
zhCause=this.chinaString(zhCause);
sw.WriteLine(zhCause);//網元啟動
sw.WriteLine("-----處理經驗------:");
sw.WriteLine(strhtml);
sw.WriteLine();
sw.Flush();

}
}
sw.Close();
MessageBox.Show("操作成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
}
catch (Exception ee)
{
MessageBox.Show("操作失敗:" + ee.Message);
}

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.