首页 新闻 会员 周边 捐助

我要爬取某网站的源码,但是它禁止了我的IP访问,我现在要用代理服务器来访问它的网站抓取源码!

0
悬赏园豆:160 [已关闭问题]

我要爬取某网站的源码,但是它禁止了我的IP访问,我现在要用代理服务器来访问它的网站抓取源码!

那网站的这个页面使用了POST提交数据:我的代码如下:高手们帮帮小弟啊,在线等【急急急】不搞定,明天上班挨批的啊!小弟感激不尽啊!

 

 public static string GetHtmlFromPost1(string urlString, Encoding encoding, string postDataString)
        {
            //定义局部变量
            System.Net.ServicePointManager.Expect100Continue = false;
            CookieContainer cookieContainer = new CookieContainer();
            HttpWebRequest httpWebRequest = null;
            HttpWebResponse httpWebResponse = null;
            Stream inputStream = null;
            Stream outputStream = null;
            StreamReader streamReader = null;
            string htmlString = string.Empty;

            //转换POST数据
            byte[] postDataByte = encoding.GetBytes(postDataString);
            //建立页面请求
            try
            {
                httpWebRequest = WebRequest.Create(urlString) as HttpWebRequest;
            }
            //处理异常
            catch (Exception ex)
            {
                throw new Exception("建立页面请求时发生错误!", ex);
            }
            //指定请求处理方式
            httpWebRequest.Method = "POST";

            httpWebRequest.Credentials = CredentialCache.DefaultCredentials;
           // httpWebRequest.Credentials = new NetworkCredential("lalalala", "123321a");
           // WebProxy proxy = new WebProxy();

    // WebProxy proxy = new WebProxy("59.36.98.154", 80);                                      //定義一個網關對象
             //                hwr.UseDefaultCredentials = true;                                      //啟用網關認証
             //   hwr.Proxy = proxy; 
            //proxy.Credentials = CredentialCache.DefaultCredentials;
            httpWebRequest.UseDefaultCredentials = true;
            httpWebRequest.Proxy = new WebProxy();
            httpWebRequest.Proxy = WebProxy.GetDefaultProxy();
            httpWebRequest.KeepAlive = false;
            httpWebRequest.ContentType = "application/x-www-form-urlencoded";
            httpWebRequest.Referer = urlString;
            httpWebRequest.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
            httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
            httpWebRequest.CookieContainer = cookieContainer;
            httpWebRequest.ContentLength = postDataByte.Length;

            #region
            // 需要使用Proxy和其配置
            //if (this.strProxyAddress.Length > 0)
            //{
            //    if (this.strProxyAddress == "DEFAULTPROXY")
            //    {
            //        httpWebRequest.Proxy = new WebProxy();
            //        httpWebRequest.Proxy = WebProxy.GetDefaultProxy();
            //    }
            //    else
            //    {
            //        WebProxy loProxy = new WebProxy("sproxy:8080", true);
            //        if (this.strProxyBypass.Length > 0)
            //        {
            //            loProxy.BypassList = this.strProxyBypass.Split(';');
            //        }

            //        if (this.strProxyUser.Length > 0)
            //        {
            //            loProxy.Credentials = new NetworkCredential(this.strProxyUser, this.strProxyPwd);
            //        }
            //        else
            //        {
            //            loProxy.Credentials = CredentialCache.DefaultCredentials;
            //        }

            //        httpWebRequest.Proxy = loProxy;
            //    }
            //}
            #endregion

            //向服务器传送数据
            try
            {
                inputStream = httpWebRequest.GetRequestStream();
                inputStream.Write(postDataByte, 0, postDataByte.Length);
            }
            //处理异常
            catch (Exception ex)
            {
                throw new Exception("发送POST数据时发生错误!", ex);
            }
            finally
            {
                inputStream.Close();
            }
            //接受服务器返回信息
            try
            {
                httpWebResponse = httpWebRequest.GetResponse() as HttpWebResponse;
                outputStream = httpWebResponse.GetResponseStream();
                streamReader = new StreamReader(outputStream, encoding);
                htmlString = streamReader.ReadToEnd();
            }
            //处理异常
            catch (Exception ex)
            {
                throw new Exception("接受服务器返回页面时发生错误!", ex);
            }
            finally
            {
                streamReader.Close();
            }
            foreach (Cookie cookie in httpWebResponse.Cookies)
            {
                cookieContainer.Add(cookie);
            }
            return htmlString;
        }

宅人的主页 宅人 | 初学一级 | 园豆:40
提问于:2009-09-13 20:39
< >
分享
其他回答(3)
0

可以试着拨号 

http://www.codeproject.com/KB/aspnet/webdialup.aspx

邀月 | 园豆:25475 (高人七级) | 2009-09-13 22:00
0
httpWebRequest.Proxy = new WebProxy(ip+port);

这样不就行了么?你想问什么啊?

PlayerYK | 园豆:95 (初学一级) | 2009-09-14 13:23
0

        postdate 请求地址需要的参数   ,编码的时候要看该网站使用的是那种编码格式,java 多半用的是jbk 编  码

//    byte[] buffer = System.Text.Encoding.GetEncoding("gb2312").GetBytes(postdata);

发送post 请求   ,代码是我封装好了的,你可以自己写,一定要设置  referer
            //    response = CurrentRequestBase.SendRequest("POST", url, referer, buf, null);
            //    page = CurrentRequestBase.GetPageByResponse(response, Encoding.GetEncoding("gb2312"));
            //    ReadlyToPayHTML = page;
            //    PayCompleteURL = url;
            //    response.Close();

 

 

 HttpWebRequest web = (HttpWebRequest)HttpWebRequest.Create(url);
                string postdate = null;
                string[] parameter = new string[]
            {
                "Version", "MerId", "MerDate", "OrdId",
                "TransAmt" ,"TransType","GateId","BgRetUrl",
                "PageRetUrl","MerPriv","MerPriv","ChkValue"
            };

                //ConstPostDataEncoding
                HtmlRegexHelper.ConstPostDataEncoding(out postdate, page, parameter);

 

                byte[] buffer = System.Text.Encoding.GetEncoding("gb2312").GetBytes(postdate);

                web.Referer = referer;
                web.Method = "POST";
                web.Accept = "image/gif, */*";
                web.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)";
                web.ContentType = "application/x-www-form-urlencoded";
                web.Referer = referer;
                web.KeepAlive = true;
                web.CookieContainer = cc;
                web.ContentLength = buffer.Length;
                Stream stream = web.GetRequestStream();
                stream.Write(buffer, 0, buffer.Length);
                stream.Close();
                HttpWebResponse response = web.GetResponse() as HttpWebResponse;
                StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("GB2312"));
                htm = sr.ReadToEnd();
                sr.Close();
                response.Close();

 

 

 

jackyong | 园豆:149 (初学一级) | 2009-09-15 07:58
0

亲,解决没?我也遇到这个问题了!

~学无止境~ | 园豆:202 (菜鸟二级) | 2014-08-13 19:14
清除回答草稿
   您需要登录以后才能回答,未注册用户请先注册