C#正则表达式:
Regex regex = new Regex(@"<div>((?<!d)d(?!d)|[^d])+sss((?<!d)d(?!d)|[^d])+</div>");
谢谢!
dudu用的是贪婪模式,你可以试试平衡组,先分离出所有的div,再查看是否含有sss
示例如下:
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
namespace Demo201109
{
class Program
{
static void Main(string[] args)
{
string s = "<div><p>sss</p><p>ddd</p></div><div><p>222</p><p>sss</p></div><div><p>mmm</p><p>sss</p></div>";
string r = @"(?is)<div[^>]*>(?><div[^>]*>(?<o>)|</div>(?<-o>)|(?:(?!</?div\b).)*)*(?(o)(?!))</div>";
SplitString(r, s);
Console.ReadKey();
}
static void SplitString(string regPattern, string SrcString)
{
if (string.IsNullOrEmpty(regPattern) || string.IsNullOrEmpty(SrcString))
{
return;
}
Regex rgx = new Regex(regPattern,RegexOptions.Compiled);
MatchCollection matches = rgx.Matches(SrcString);
//得到所有嵌套的div集合
if (matches.Count > 0)
{
//Console.WriteLine("{0} ({1}个匹配项):", SrcString, matches.Count);
foreach (Match match in matches)
{
string v = match.Value;
//如果该匹配项的值含有sss,并且不含有ddd,输出该值
if (v.IndexOf("sss") > 0 && v.IndexOf("ddd") < 0)
{
Console.WriteLine("" + match.Value);
}
}
}
}
}
}
结果:
// <div><p>222</p><p>sss</p></div>
// <div><p>mmm</p><p>sss</p></div>
同样谢谢!