这个很多,你搜一下模拟登录
http://www.cnblogs.com/anjou/archive/2006/12/25/602943.html
http://www.cnblogs.com/soonfly/archive/2008/09/12/1289759.html
http://download.csdn.net/source/1227613
public String Post(string url, string postData)
{
try
{
#region 登录
string loginurl = "http://10.16.230.26:8080/login.aspx?ReturnUrl=%2flogout.aspx";
byte[] loginByteArray = Encoding.UTF8.GetBytes(GetLoginPostData()); // 转化
CookieContainer myCookieContainer = new CookieContainer();
//新建一个CookieContainer来存放Cookie集合
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(loginurl);
//新建一个HttpWebRequest
myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
myHttpWebRequest.ContentLength = loginByteArray.Length;
myHttpWebRequest.Method = "POST";
myHttpWebRequest.CookieContainer = myCookieContainer;
//设置HttpWebRequest的CookieContainer为刚才建立的那个myCookieContainer
Stream myRequestStream = myHttpWebRequest.GetRequestStream();
// Send the data.
myRequestStream.Write(loginByteArray, 0, loginByteArray.Length); //写入参数
myRequestStream.Close();
//把数据写入HttpWebRequest的Request流
myRequestStream.Close();
//关闭打开对象
HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
//新建一个HttpWebResponse
myHttpWebResponse.Cookies = myCookieContainer.GetCookies(myHttpWebRequest.RequestUri);
//获取一个包含url的Cookie集合的CookieCollection
Stream myResponseStream = myHttpWebResponse.GetResponseStream();
StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.UTF8);
//把数据从HttpWebResponse的Response流中读出
myStreamReader.Close();
myResponseStream.Close();
#endregion
byte[] byteArray = Encoding.UTF8.GetBytes(postData); // 转化
//拿到了Cookie,再进行请求就能直接读取到登录后的内容了
myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url);
myHttpWebRequest.Headers.Add("Accept-Language: zh-cn");
myHttpWebRequest.Headers.Add("UA-CPU: x86");
myHttpWebRequest.Headers.Add("Accept-Encoding: gzip, deflate");
myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Embedded Web Browser from: http://bsalsa.com/; InfoPath.2; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
myHttpWebRequest.ContentLength = byteArray.Length;
myHttpWebRequest.Method = "POST";
myHttpWebRequest.CookieContainer = myCookieContainer;//*
//刚才那个CookieContainer已经存有了Cookie,把它附加到HttpWebRequest中则能直接通过验证
myRequestStream = myHttpWebRequest.GetRequestStream();
// Send the data.
myRequestStream.Write(byteArray, 0, byteArray.Length); //写入参数
myRequestStream.Close();
myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
myHttpWebResponse.Cookies = myCookieContainer.GetCookies(myHttpWebRequest.RequestUri);
myResponseStream = myHttpWebResponse.GetResponseStream();
myStreamReader = new StreamReader(myResponseStream, Encoding.UTF8);
string outdata = myStreamReader.ReadToEnd();
myStreamReader.Close();
myResponseStream.Close();
return outdata;
}
catch (Exception ex)
{
return String.Empty;
}
}
没有验证码的话一切好说,自己注册一个账号,再用这个账号模拟登陆就可以了,模拟登陆的方法可以参考楼上zzjj 的代码
如果有验证码的话就会麻烦一点,你可以google一下如何破解验证码,不过有些比较复杂的验证码还是比较难破解的
还有一种办法是通过WebBrowser爬,就是慢点,但是如果只爬专门的站的话够用并且保险,有的站见你频繁访问是会自动喀嚓掉你的。
通过WebBrowser你就可以自己先登录好,然后让它自己挂机爬就行了。