c# 使用正则表达式来获得数据
打个比方:aa bb cc
<li ><a href="aa" ><strong>bb</strong></a><span class="writing">cc</span></li>
原始数据:
<li ><a href="http://news.naver.com/main/read.nhnmode=LSD&mid=shm&sid1=102&oid=003&aid=0003428615" ><strong>bb</strong></a><span class="writing">cc</span></li>
在这个数据当中我要获取
1.http://news.naver.com/main/read.nhnmode=LSD&mid=shm&sid1=102&oid=003&aid=0003428615
2.bb
3.cc
希望各位能帮帮小弟!!!
Regex re = new Regex(@"<li\s*?>.*href=['""](.*)['""].*<strong.*?>(.*)</strong>\s*?</a>.*<span.*?>(.*)</span>.*?</li>", RegexOptions.None);
string result = re.Replace("$1 $2 $3");
HTML这些标签是死的吧?
<li ><a href=".+?" ><strong>.+?</strong></a><span class="writing">.+?</span></li>
你做的是web吧! 你可以通过RegularExpressionValidator控件来控制的啊
using System;
using System.Text;
using System.Text.RegularExpressions;
public static void Main(string[] args)
{
string html0 = "<li><a href=\"http://news.naver.com/main/read.nhnmode=LSD&mid=shm&sid1=102&oid=003&aid=0003428615\"><strong>bb</strong></a><span class=\"writing\">cc</span></li>";
string regex0 = @"<[^>]+>";
SplitHtmlString(html0, regex0);
Console.ReadLine();
}
static void SplitHtmlString(string html, string regex)
{
//分离事件
Regex splitRegex = new Regex(regex, RegexOptions.IgnoreCase);
String[] splitResults;
splitResults = splitRegex.Split(html);
//分离结束
//匹配事件
System.Text.RegularExpressions.MatchCollection matchesFound;
System.Text.RegularExpressions.Regex matchesRegex = new System.Text.RegularExpressions.Regex(regex, RegexOptions.IgnoreCase);
matchesFound = matchesRegex.Matches(html);
System.Text.StringBuilder resultString = new System.Text.StringBuilder();
string url = matchesFound[1].ToString();
//<a href="http://news.naver.com/main/read.nhnmode=LSD&mid=shm&sid1=102&oid=003&aid=0003428615"/>
url = GetPureURL3(url);
resultString.AppendLine(url);
resultString.AppendLine(splitResults[3].ToString());
resultString.AppendLine(splitResults[6].ToString());
Console.Write(resultString.ToString());
}
//<a href="http://news.naver.com/main/read.nhnmode=LSD&mid=shm&sid1=102&oid=003&aid=0003428615"/>
static string GetPureURL3(string sHtml)
{
try
{
string[] result = sHtml.Split('"');
return result[1].ToString();
}
catch { return sHtml; }
}