首页 新闻 搜索 专区 学院

c#用WebClient获取网页源码\下载图片,求优化求经验

0
悬赏园豆:40 [已解决问题] 解决于 2015-02-14 10:56
 1      /// <summary>
 2         /// WebClient取网页源码
 3         /// </summary>
 4         /// <param name="url"></param>
 5         /// <param name="timout">单位(秒)</param>
 6         /// <param name="proxy"></param>
 7         /// <param name="httpCode">http状态码</param>
 8         /// <returns></returns>
 9         public static string GetHtmlSource(string url, int timout, string proxy, out int httpCode)
10         {
11             try
12             {14                 using (var client = new ExtendedWebClient())
15                 {
16                     client.Timeout = timout * 1000;
17                     client.Proxy = proxy;
18                     client.Encoding = Encoding.UTF8;
19                     string str = client.DownloadString(url);
20 
21                     //验证字符是否有乱码
22                     if (IsLuan(str))
23                     {
24                         Regex reg = new Regex("<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase);
25                         Match m = reg.Match(str);
26                         string charset = m.Groups[2].Value;
27                         if (!string.IsNullOrEmpty(charset))
28                         {
29                             string encode = charset.Replace("\"", "");
30                             var data = client.DownloadData(url);
31                             str = new StreamReader(new MemoryStream(data), Encoding.GetEncoding(encode)).ReadToEnd();
32                         }
33                     }
34                     httpCode = 200;
35                     return str;                  
36                 }
37             }
38             catch (Exception ex)
39             {
40                 httpCode = 404;
41                 return ex.Message;
42             }
43         }
44 
45 
46         public class ExtendedWebClient : WebClient
47         {
48             public int Timeout { get; set; }
49             public string Proxy { get; set; }
50 
51             protected override WebRequest GetWebRequest(Uri address)
52             {
53                 HttpWebRequest request = (HttpWebRequest)base.GetWebRequest(address);
54                 //  if (request != null)
55                 request.KeepAlive = false;
56                 request.ProtocolVersion = HttpVersion.Version10;
57                 request.Method = "GET";
58                 request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
59                 request.ServicePoint.Expect100Continue = false;
60                 request.Timeout = Timeout;
61                 request.ReadWriteTimeout = 1000 * Timeout;
62 
63                 ServicePointManager.DefaultConnectionLimit = 50;
64                 if (!string.IsNullOrEmpty(Proxy))
65                     request.Proxy = new WebProxy(Proxy);
66 
67                 return request;
68             }
69 
70             public ExtendedWebClient()
71             {
72 
73 
74             }
75  
76         }

//这是下载图片的方法,
private int DownLoadImage(string imgUrl, string url) { var path = FileHelper.CheckPath(imgUrl, _tmpPath + "\\" + WebTitle); try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl); request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)"; request.Timeout = int.Parse(nudTimeOut.Text) * 1000; if (!string.IsNullOrEmpty(txtProxy.Text)) request.Proxy = new WebProxy(txtProxy.Text); WebResponse response = request.GetResponse(); Stream stream = response.GetResponseStream(); if (response.ContentType.ToLower().StartsWith("image/")) { using (Image imgPhoto = Image.FromStream(stream)) {            //一些判断             imgPhoto.Save(path);             return 1;             }

          }
catch (WebException)        {        return 0;        }        catch (UriFormatException)        {        return 0;        }

我做了一个根据url获取所有图片的程序。。。 用webClient获取网页源码, 用webrequest下载图片, 所有功能我已经实现,但是自己不满意, 想求经验……

特求做过这方面的大侠指点。  我是不是该用socket获取和下载,socket真的优于webrequest和webclient?有没有好点的样例推荐? 比如c#开源的这类项目,或者给点思路也行啊。

  

奋奋奋的主页 奋奋奋 | 初学一级 | 园豆:32
提问于:2015-02-12 13:55
< >
分享
最佳答案
0

不满意指的是什么地方不满意呢.? 效率么.?

其实网页里的图片,基本上也没多大,直接这样下载就可以.

如果你感觉想再提升一下的话,建议你去看一下@sufei 同学的httpHelper.cs

收获园豆:30
只会造轮子 | 老鸟四级 |园豆:2274 | 2015-02-12 15:09
其他回答(4)
0

换成httpclient就好了,.net4.5以后支持,各种异步什么的。

收获园豆:6
arg | 园豆:1047 (小虾三级) | 2015-02-12 17:02
0

你确定不是因为你自己的网速太烂????

收获园豆:4
XiaoFaye | 园豆:3082 (老鸟四级) | 2015-02-12 17:48
0

越底层效率越高,也越难写

每一层都是对下层的包装,拆包、封包必然浪费资源

飞鱼576 | 园豆:202 (菜鸟二级) | 2015-02-15 09:17
0

你可以做异步的httprequest,这样下载图片更快一点

netqiang | 园豆:405 (菜鸟二级) | 2015-02-26 13:21
清除回答草稿
   您需要登录以后才能回答,未注册用户请先注册