static void Main(string[] args)
{
string srcString = @"<div class='test'></div><div class='ad'>
<div>aaaaa</div>
<div>bbbbb</div>
<div>cccccc</div><table><tr><td>Div Table Contest</td></tr></table>
</div><table><tr><td>outer Content</td></tr></table>";
string regex=@"(?<=<div class='ad'.*>)([\s\S]*)(?=</div>)";
Console.Write(FilterByDiv(srcString, regex));
Console.ReadKey();
}
public static string FilterByDiv(string sHtml,string regex)
{
System.Text.RegularExpressions.Regex ex = new System.Text.RegularExpressions.Regex(regex, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
return ex.Match(sHtml).Value.Trim();
}
运行结果:
<div>aaaaa</div>
<div>bbbbb</div>
<div>cccccc</div><table><tr><td>Div Table Contest</td></tr></table>
string beginstr="<div class='ad'>";
string endstr="</div>";
Regex rg = new Regex(string.Format(@"{0}(?<g1>.*){1}", beginstr, endstr));
if (rg.IsMatch(input))
{
return rg.Match(input).Groups["g1"].Value;
}
使用贪婪模式是不准确的,对于类似下面这种源字符得到的结果就是错误的
aaaaa
<div class='ad'>
<div>dfdferhgthghg</div>
<div>tgtgfbvf</div>
<div>rtrggf</div>
...................div个数不定
</div>
bbbbb
<div>test</div>
这种需求还是要用平衡组
Regex reg = new Regex(@"(?is)<div\s+class='ad'[^>]*>(?><div[^>]*>(?<o>)|</div>(?<-o>)|(?:(?!</?div\b).)*)*(?(o)(?!))</div>");
MatchCollection mc = reg.Matches(yourStr);
foreach (Match m in mc)
{
richTextBox2.Text += m.Value + "\n";
}