我要爬取某网站的源码,但是它禁止了我的IP访问,我现在要用代理服务器来访问它的网站抓取源码!
那网站的这个页面使用了POST提交数据:我的代码如下:高手们帮帮小弟啊,在线等【急急急】不搞定,明天上班挨批的啊!小弟感激不尽啊!
public static string GetHtmlFromPost1(string urlString, Encoding encoding, string postDataString)
{
//定义局部变量
System.Net.ServicePointManager.Expect100Continue = false;
CookieContainer cookieContainer = new CookieContainer();
HttpWebRequest httpWebRequest = null;
HttpWebResponse httpWebResponse = null;
Stream inputStream = null;
Stream outputStream = null;
StreamReader streamReader = null;
string htmlString = string.Empty;
//转换POST数据
byte[] postDataByte = encoding.GetBytes(postDataString);
//建立页面请求
try
{
httpWebRequest = WebRequest.Create(urlString) as HttpWebRequest;
}
//处理异常
catch (Exception ex)
{
throw new Exception("建立页面请求时发生错误!", ex);
}
//指定请求处理方式
httpWebRequest.Method = "POST";
httpWebRequest.Credentials = CredentialCache.DefaultCredentials;
// httpWebRequest.Credentials = new NetworkCredential("lalalala", "123321a");
// WebProxy proxy = new WebProxy();
// WebProxy proxy = new WebProxy("59.36.98.154", 80); //定義一個網關對象
// hwr.UseDefaultCredentials = true; //啟用網關認証
// hwr.Proxy = proxy;
//proxy.Credentials = CredentialCache.DefaultCredentials;
httpWebRequest.UseDefaultCredentials = true;
httpWebRequest.Proxy = new WebProxy();
httpWebRequest.Proxy = WebProxy.GetDefaultProxy();
httpWebRequest.KeepAlive = false;
httpWebRequest.ContentType = "application/x-www-form-urlencoded";
httpWebRequest.Referer = urlString;
httpWebRequest.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
httpWebRequest.CookieContainer = cookieContainer;
httpWebRequest.ContentLength = postDataByte.Length;
#region
// 需要使用Proxy和其配置
//if (this.strProxyAddress.Length > 0)
//{
// if (this.strProxyAddress == "DEFAULTPROXY")
// {
// httpWebRequest.Proxy = new WebProxy();
// httpWebRequest.Proxy = WebProxy.GetDefaultProxy();
// }
// else
// {
// WebProxy loProxy = new WebProxy("sproxy:8080", true);
// if (this.strProxyBypass.Length > 0)
// {
// loProxy.BypassList = this.strProxyBypass.Split(';');
// }
// if (this.strProxyUser.Length > 0)
// {
// loProxy.Credentials = new NetworkCredential(this.strProxyUser, this.strProxyPwd);
// }
// else
// {
// loProxy.Credentials = CredentialCache.DefaultCredentials;
// }
// httpWebRequest.Proxy = loProxy;
// }
//}
#endregion
//向服务器传送数据
try
{
inputStream = httpWebRequest.GetRequestStream();
inputStream.Write(postDataByte, 0, postDataByte.Length);
}
//处理异常
catch (Exception ex)
{
throw new Exception("发送POST数据时发生错误!", ex);
}
finally
{
inputStream.Close();
}
//接受服务器返回信息
try
{
httpWebResponse = httpWebRequest.GetResponse() as HttpWebResponse;
outputStream = httpWebResponse.GetResponseStream();
streamReader = new StreamReader(outputStream, encoding);
htmlString = streamReader.ReadToEnd();
}
//处理异常
catch (Exception ex)
{
throw new Exception("接受服务器返回页面时发生错误!", ex);
}
finally
{
streamReader.Close();
}
foreach (Cookie cookie in httpWebResponse.Cookies)
{
cookieContainer.Add(cookie);
}
return htmlString;
}
可以试着拨号
http://www.codeproject.com/KB/aspnet/webdialup.aspx
httpWebRequest.Proxy = new WebProxy(ip+port);
这样不就行了么?你想问什么啊?
postdate 请求地址需要的参数 ,编码的时候要看该网站使用的是那种编码格式,java 多半用的是jbk 编 码
// byte[] buffer = System.Text.Encoding.GetEncoding("gb2312").GetBytes(postdata);
发送post 请求 ,代码是我封装好了的,你可以自己写,一定要设置 referer
// response = CurrentRequestBase.SendRequest("POST", url, referer, buf, null);
// page = CurrentRequestBase.GetPageByResponse(response, Encoding.GetEncoding("gb2312"));
// ReadlyToPayHTML = page;
// PayCompleteURL = url;
// response.Close();
HttpWebRequest web = (HttpWebRequest)HttpWebRequest.Create(url);
string postdate = null;
string[] parameter = new string[]
{
"Version", "MerId", "MerDate", "OrdId",
"TransAmt" ,"TransType","GateId","BgRetUrl",
"PageRetUrl","MerPriv","MerPriv","ChkValue"
};
//ConstPostDataEncoding
HtmlRegexHelper.ConstPostDataEncoding(out postdate, page, parameter);
byte[] buffer = System.Text.Encoding.GetEncoding("gb2312").GetBytes(postdate);
web.Referer = referer;
web.Method = "POST";
web.Accept = "image/gif, */*";
web.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)";
web.ContentType = "application/x-www-form-urlencoded";
web.Referer = referer;
web.KeepAlive = true;
web.CookieContainer = cc;
web.ContentLength = buffer.Length;
Stream stream = web.GetRequestStream();
stream.Write(buffer, 0, buffer.Length);
stream.Close();
HttpWebResponse response = web.GetResponse() as HttpWebResponse;
StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("GB2312"));
htm = sr.ReadToEnd();
sr.Close();
response.Close();
亲,解决没?我也遇到这个问题了!