using System;using System.Collections.Generic;using System.IO;using System.Linq;using System.Net;using System.Text;using System.Text.RegularExpressions;using System.Web;using System.Web.UI;using System.Web.UI.WebControls;using System.Xml;namespace WebApplication19{ public enum SearchRange { th=0, td=1 } public partial class WebForm1 : System.Web.UI.Page { public string MKT; private string getHtml() { ListtrList = new List (); try { WebClient wc = new WebClient(); using (Stream stream = wc.OpenRead("http://srh.bankofchina.com/search/whpj/search.jsp?erectDate=2001-11-01¬hing=2016-11-04&pjname=1316&page=4")) { using (StreamReader sr = new StreamReader(stream, Encoding.UTF8)) { string content = sr.ReadToEnd(); //提取div内容开始 string divPatern = @"(?<= ]*?>)([\s\S]*?)(?=)"; MatchCollection divMatches = Regex.Matches(content, divPatern); string divContent = string.Empty; foreach (Match match in divMatches) { divContent = match.Groups[0].Value; break; } //提取div内容结束 //提取表格内容开始 string tablePatern = @"(?<=]*?>)([\s\S]*?)(?=
)"; MatchCollection tableMatches = Regex.Matches(divContent, tablePatern); string tableContent = string.Empty; foreach (Match match in tableMatches) { tableContent = match.Groups[0].Value; break; } //提取表格内容结束 //提取行开始 string trPatern = @"(?<=]*?>)([\s\S]*?)(?=)"; MatchCollection trMatchCollection = Regex.Matches(tableContent, trPatern); for (int j = 0; j < trMatchCollection.Count; j++) { Match match = trMatchCollection[j]; string tr = string.Empty; tr = match.Groups[0].Value; trList.Add(tr); } //提取行结束 } //获取表头列元素,或者内容行的单元格元素 trlist[0]是表头 SearchR,ange告诉程序要查表头 还是 内容行 List thList = GET_TH_OR_TD_LIST(SearchRange.th, trList[0]); System.Collections.ArrayList tdsList = new System.Collections.ArrayList(); for (int i = 1; i < trList.Count; i++) { tdsList.Add(GET_TH_OR_TD_LIST(SearchRange.td, trList[i])); } } } catch (Exception ex) { } return MKT; } private List GET_TH_OR_TD_LIST(SearchRange range,string row) { string tmp = ""; tmp = range.ToString(); string tdPatern = $@"(?<=(<{tmp}[^>]*?>))(? [\s\S]*?)(?= )"; MatchCollection CurrenttdMatchCollection = Regex.Matches(row, tdPatern); string td = string.Empty; List tdlList = new List (); List contentList = new List (); foreach (Match match in CurrenttdMatchCollection) { td = match.Groups["tdCell"].Value; contentList.Add(td); } return contentList; } protected void Page_Load(object sender, EventArgs e) { getHtml(); } }}