利用 HttpWebRequest HttpWebResponse取得内容
string str = null ;
string rul="";
HttpWebRequest httpReq = (HttpWebRequest)WebRequest.Create(url);
httpReq.UserAgent = @"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.40607; .NET CLR 1.1.4322)";
StreamReader sr = null;
HttpWebResponse httpResponse = null;
try
{
httpResponse = (HttpWebResponse)httpReq.GetResponse();
sr = new StreamReader(httpResponse.GetResponseStream(), Encoding.UTF8);
str = sr.ReadToEnd(); //返回的结果
}
catch (Exception Ex)
{
//记录异常
}
上面str就会取得你要的内容,你再通过正则表达式,把<title></title>部分取出来就可以了
一半的做法是 先取到这个页面的html 然后通过正则表达式截取自己需要的内容
一、可以考虑用Response.Filter替换,直接截取Request,并替换,参考:一个页面标题和过滤输出的解决方案
http://dotnet.chinaitlab.com/ASPNET/778889.html
二、这里有个以前用过的方法:逐行读取,取到title即终止
#region 获取所需要的字段内容
/// <summary>
/// 获取所需要的字段内容
/// </summary>
/// <param name="strUrl">所要查找的远程网页地址</param>
/// <param name="timeout">超时时长设置,一般设置为8000</param>
/// <param name="enterType">是否输出换行符,0不输出,1输出文本框换行</param>
/// <param name="EnCodeType">编码方式</param>
/// <returns></returns>
public static string GetRequestString(string strUrl, int timeout, int enterType, Encoding EnCodeType)
{
string strResult = string.Empty;
StreamReader sr = null;
string temp = string.Empty;
try
{
HttpWebRequest myReq = (HttpWebRequest)HttpWebRequest.Create(strUrl);
myReq.Timeout = timeout;
HttpWebResponse HttpWResp = (HttpWebResponse)myReq.GetResponse();
if (HttpWResp.StatusCode == System.Net.HttpStatusCode.OK)
{
StringBuilder strBuilder = new StringBuilder();
Stream myStream = HttpWResp.GetResponseStream();
sr = new StreamReader(myStream, EnCodeType);
string tmp = string.Empty;
while ((temp = sr.ReadLine()) != null)
{
strBuilder.Append(temp);
//if has </title> then end
tmp = strBuilder.ToString();
if (tmp.IndexOf("</title>") > 0) { break; }
if (enterType == 1) { strBuilder.Append("\r\n"); }
}
strResult = strBuilder.ToString();
return strResult;
}
return string.Empty;
}
catch (Exception ex)
{
//#region Loghandle by Tony 2008.11.21
return strResult;
//#endregion
}
finally { if (sr != null) { sr.Close(); } }
}
#endregion
返回的从页面开始到</title>之间的部分。
用HEad请求