1 /// <summary> 2 /// WebClient取网页源码 3 /// </summary> 4 /// <param name="url"></param> 5 /// <param name="timout">单位(秒)</param> 6 /// <param name="proxy"></param> 7 /// <param name="httpCode">http状态码</param> 8 /// <returns></returns> 9 public static string GetHtmlSource(string url, int timout, string proxy, out int httpCode) 10 { 11 try 12 {14 using (var client = new ExtendedWebClient()) 15 { 16 client.Timeout = timout * 1000; 17 client.Proxy = proxy; 18 client.Encoding = Encoding.UTF8; 19 string str = client.DownloadString(url); 20 21 //验证字符是否有乱码 22 if (IsLuan(str)) 23 { 24 Regex reg = new Regex("<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase); 25 Match m = reg.Match(str); 26 string charset = m.Groups[2].Value; 27 if (!string.IsNullOrEmpty(charset)) 28 { 29 string encode = charset.Replace("\"", ""); 30 var data = client.DownloadData(url); 31 str = new StreamReader(new MemoryStream(data), Encoding.GetEncoding(encode)).ReadToEnd(); 32 } 33 } 34 httpCode = 200; 35 return str; 36 } 37 } 38 catch (Exception ex) 39 { 40 httpCode = 404; 41 return ex.Message; 42 } 43 } 44 45 46 public class ExtendedWebClient : WebClient 47 { 48 public int Timeout { get; set; } 49 public string Proxy { get; set; } 50 51 protected override WebRequest GetWebRequest(Uri address) 52 { 53 HttpWebRequest request = (HttpWebRequest)base.GetWebRequest(address); 54 // if (request != null) 55 request.KeepAlive = false; 56 request.ProtocolVersion = HttpVersion.Version10; 57 request.Method = "GET"; 58 request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"; 59 request.ServicePoint.Expect100Continue = false; 60 request.Timeout = Timeout; 61 request.ReadWriteTimeout = 1000 * Timeout; 62 63 ServicePointManager.DefaultConnectionLimit = 50; 64 if (!string.IsNullOrEmpty(Proxy)) 65 request.Proxy = new WebProxy(Proxy); 66 67 return request; 68 } 69 70 public ExtendedWebClient() 71 { 72 73 74 } 75 76 }
//这是下载图片的方法,
private int DownLoadImage(string imgUrl, string url) { var path = FileHelper.CheckPath(imgUrl, _tmpPath + "\\" + WebTitle); try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl); request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)"; request.Timeout = int.Parse(nudTimeOut.Text) * 1000; if (!string.IsNullOrEmpty(txtProxy.Text)) request.Proxy = new WebProxy(txtProxy.Text); WebResponse response = request.GetResponse(); Stream stream = response.GetResponseStream(); if (response.ContentType.ToLower().StartsWith("image/")) { using (Image imgPhoto = Image.FromStream(stream)) { //一些判断 imgPhoto.Save(path); return 1; }
}
catch (WebException) { return 0; } catch (UriFormatException) { return 0; }
我做了一个根据url获取所有图片的程序。。。 用webClient获取网页源码, 用webrequest下载图片, 所有功能我已经实现,但是自己不满意, 想求经验……
特求做过这方面的大侠指点。 我是不是该用socket获取和下载,socket真的优于webrequest和webclient?有没有好点的样例推荐? 比如c#开源的这类项目,或者给点思路也行啊。
换成httpclient就好了,.net4.5以后支持,各种异步什么的。
你确定不是因为你自己的网速太烂????
越底层效率越高,也越难写
每一层都是对下层的包装,拆包、封包必然浪费资源
你可以做异步的httprequest,这样下载图片更快一点