using HtmlAgilityPack; using System; using System.Collections.Generic; using System.Drawing; using System.IO; using System.Linq; using System.Net; using System.Reflection; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using Nipusa.Ext; using System.Diagnostics; namespace Nipusa.Net { /// <summary> /// 主要的目的是为了cookie的自动处理,编码自动处理 author:http://www.nipusa.net/ /// </summary> public class Http { /// <summary> /// cookie修正,主要用于回发所有cookie的临时保存 /// </summary> List<Cookie> cookies_revise = new List<Cookie>(); public Http() { ConnTimeout = 1000*10; RWTimeout = 1000*10; AutoRedirect = true; Proxy = null; Cookies = new CookieContainer(); Headers = new Dictionary<string, string>(); results = null; } Uri reqUri; /* /// <summary> /// key=网址,value=网址对应编码 用于post编码识别{此想法未实现} /// </summary> //Dictionary<string, string> encoding = new Dictionary<string, string>();*/ public Uri ReqUri { get { return reqUri; } } public int ConnTimeout { get; set; } public int RWTimeout { get; set; } public bool AutoRedirect { get; set; } /// <summary> /// 被封的网站或须要登陆才能访问的网页可以通过此设置 /// </summary> public IProxy Proxy { get; set; } public CookieContainer Cookies { get; set; } public Dictionary<string, string> Headers { get; set; } public ResponseInfo Response { get; set; } byte[] results; /// <summary> /// 上一次的请求结果 /// </summary> public byte[] Results { get { return results; } } HttpWebRequest Request(Uri uri) { Trace.WriteLine("Request(Uri uri)"); this.reqUri = uri; var req = WebRequest.CreateHttp(uri);// as HttpWebRequest; req.Proxy = null; req.AllowAutoRedirect = AutoRedirect; req.Timeout = ConnTimeout; req.ReadWriteTimeout = RWTimeout; req.Accept = "text/html, application/xhtml+xml, */*"; req.UserAgent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)"; req.Headers["Accept-Encoding"] = "gzip, deflate"; req.AutomaticDecompression = DecompressionMethods.GZip; foreach (var h in Headers) { switch (h.Key) { case "User-Agent": req.UserAgent = h.Value; break; case "METHOD": req.Method = h.Value; break; case "Referer": req.Referer = h.Value; break; case "Content-Type": req.ContentType = h.Value; break; default: req.Headers[h.Key] = h.Value; break; } } //cookies req.CookieContainer = Cookies; cookies_revise.ForEach(c => { req.CookieContainer.Add(uri, new Cookie(c.Name, c.Value)); }); return req; } public async Task<byte[]> Get(Uri uri) { Trace.WriteLine("Get(Uri uri)"); Headers["METHOD"] = "GET"; byte[] bts = await DoRequest(uri); return bts; } public async Task<byte[]> Post(Uri uri, Dictionary<string, string> form, string encoding = "utf-8") { Headers["METHOD"] = "POST"; Headers["Content-Type"] = "application/x-www-form-urlencoded"; byte[] bts = await DoRequest(uri, form, encoding); return bts; } public async Task<byte[]> DoRequest(Uri uri, Dictionary<string, string> form = null, string form_encoding = null) { Trace.WriteLine("DoRequest(Uri uri, Dictionary<string, string> form = null, string form_encoding = null)"); var req = Request(uri); if ("POST" == req.Method) { if (null == form || 1 > form.Count) { throw new Exception("表单没有值"); } List<string> formlst = new List<string>(); foreach (var kv in form) { if (string.Empty == kv.Key) formlst.Add(kv.Value); else formlst.Add(kv.Key + "=" + kv.Value); } using (var stream = await req.GetRequestStreamAsync()) { var buffer = Encoding.GetEncoding(form_encoding).GetBytes(string.Join("&", formlst)); Trace.WriteLine("stream.WriteAsync(buffer, 0, buffer.Length)"); await stream.WriteAsync(buffer, 0, buffer.Length);//.Timeout(TimeSpan.FromMilliseconds(RWTimeout),"提交数据超时"); } } Trace.WriteLine("req.GetResponseAsync()"); using (var res = await req.GetResponseAsync() as HttpWebResponse) { //拷贝对属性 this.Response = new ResponseInfo() { CharacterSet = res.CharacterSet, ContentEncoding = res.ContentEncoding, ContentLength = res.ContentLength, ContentType = res.ContentType, Cookies = res.Cookies, Headers = res.Headers, IsFromCache = res.IsFromCache, IsMutuallyAuthenticated = res.IsMutuallyAuthenticated, LastModified = res.LastModified, Method = res.Method, ProtocolVersion = res.ProtocolVersion, ResponseUri = res.ResponseUri, Server = res.Server, StatusCode = res.StatusCode, StatusDescription = res.StatusDescription, SupportsHeaders = res.SupportsHeaders }; //用于修正cookie CookieCollection ccl = res.Cookies; cookies_revise.Clear(); foreach (Cookie ck in ccl) { cookies_revise.Add(ck); } using (var ms = new MemoryStream()) { using (var stream = res.GetResponseStream()) { //await stream.CopyToAsync(ms);//.Timeout(TimeSpan.FromMilliseconds(RWTimeout),"获取数据超时"); byte[] buffer = new byte[1024]; int count = 0; Trace.WriteLine("stream.ReadAsync(buffer, 0, buffer.Length))"); while (0<(count=await stream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false))) { //int count = await stream.ReadAsync(buffer, 0, buffer.Length); /*if (count == 0) { return; }*/ Trace.WriteLine("ms.WriteAsync(buffer, 0, count);" + count.ToString()); await ms.WriteAsync(buffer, 0, count); } Trace.WriteLine("toarray"); this.results = ms.ToArray(); } } } return this.results; } /// <summary> /// null表示自动识别编码 /// </summary> /// <param name="ec"></param> /// <returns></returns> public string Str(string encoding = null) { if (null == this.results) return null; if (null != encoding) { return Encoding.GetEncoding(encoding).GetString(this.results); } else { if ("ISO-8859-1" == Response.CharacterSet) Response.CharacterSet = ""; if (string.Empty != Response.CharacterSet) { return Encoding.GetEncoding(Response.CharacterSet).GetString(this.results); } else { Encoding utf8 = Encoding.GetEncoding("utf-8"); string temp = utf8.GetString(this.results); Match macth = Regex.Match(temp, @"<meta.*?charset=[^\w]?(?<cs>[-\w]+).*?>", RegexOptions.IgnoreCase); if (macth.Success) { if (utf8.BodyName == macth.Groups["cs"].Value.Trim().ToLower()) return temp; try { return Encoding.GetEncoding(macth.Groups["cs"].Value.Trim()).GetString(this.results); } catch (Exception ex) { } } PROM_LETTER: //查找到常用字就直接返回。找不到用gbk编码一次直接返回 if ((new Regex("(的|得|一|二|三|四|五|六|七|八|九|首|页|版|权|联|系|了|是|不|我|在|人|们|有|地|和|大|着|主|中|上|为|们|个|用|工|时|要|动|国|产|以|到|会|作|他|她|它|来|分|生|对|于|学|下|级|就|义|年|这|于|对|而|也)").IsMatch(temp))) { return temp; } return Encoding.GetEncoding("gbk").GetString(this.results); } } } public Image Img() { if (null == this.results) return null; Image img = null; MemoryStream ms = new MemoryStream(this.results); img = Image.FromStream(ms); return img; } public List<Uri> Links() { List<Uri> lst = new List<Uri>(); if (null != this.results) { HtmlDocument html_doc = new HtmlDocument(); html_doc.LoadHtml(Str()); HtmlNodeCollection nodes = html_doc.DocumentNode.SelectNodes("//a[@href]"); if (null != nodes) { string cdomain = Response.ResponseUri.Domain(); foreach (HtmlNode node in nodes) { Uri uri = null; string url = node.Attributes["href"].Value; if (-1 < url.IndexOf("#")) continue; if (url.StartsWith("javascript:")) continue; if (-1 < url.IndexOf("://")) Uri.TryCreate(url, UriKind.Absolute, out uri); else Uri.TryCreate(Response.ResponseUri, url, out uri); if (null != uri) { if (!lst.Contains(uri) && cdomain == uri.Domain()) lst.Add(uri); } } } } return lst; } } }
上边是代码片段。在button的click(async void)事件中。while(true){await ...}
发现在网络慢的时候。ReadAsync永不返回?
后来按http://neue.cc/2012/10/16_383.html
加入了超时。但是让界面卡死了。
界面调用代码。
async void button1_Click(object sender, EventArgs e)
{
Uri current_uri = await dqueue[file][0].De();
if (null != current_uri)// turi.Result)
{
try
{
//Task<byte[]> thttp =
await http.Get(current_uri);
//thttp.ConfigureAwait(false);
//thttp.Wait();
}
catch .......
}
根据trace的输出。程序死在了ReadAsync上。输出了 Trace.WriteLine("stream.ReadAsync(buffer, 0, buffer.Length))");再也不返回了。
现在把http类里的代码变成同步。程序运行一天。无任何问题。
下边是调用代码
try { //Task<byte[]> thttp = //await http.Get(current_uri); await Task.Run(() => http.Get(current_uri)); //thttp.ConfigureAwait(false); //thttp.Wait(); }
需要用委托吧
线程超时需要处理,重新发送请求,进行轮询
发现在网络慢的时候。ReadAsync永不返回?
后来按http://neue.cc/2012/10/16_383.html
加入了超时。但是让界面卡死了。
加了超时的。
@gnhao: 是不是内存锁定了?
看代码没看出头绪来,只能根据经验提供参考
@二十三号同学: 有没示例或资料?你是指死锁?我怀疑是死锁。但我的异步代码没新开任何task或thread.它完全工作在ui线程。按理应该卡才对。但它不卡。这个情况在网络慢的时候容易发生。基本上和:http://msdn.microsoft.com/zh-cn/library/vstudio/hh300224.aspx?cs-save-lang=1&cs-lang=csharp#code-snippet-2 这里的代码没两样。现在代码换成同步的了。没有任何问题。程序运行一天了。调用代码只是这样变了一下。 await Task.Run(() => http.Get(current_uri));页非异步的await http.get。其实上边封装的很简单的。改成同步只是去掉task async await并调用同步方法。我看.net内部也是调用了begin end对去支持实现 async方法。现在真心搞不懂了。
代码太长,没法全看。
await http.Get(current_uri);
这样的异步调用没有任何意义。异步调用的基本模式是:
调用异步方法
执行不需要异步方法结果的代码
await先前调用异步方的法返回值等待异步方法的执行结果
核心的在
using (var ms = new MemoryStream()) { using (var stream = res.GetResponseStream()) { //await stream.CopyToAsync(ms);//.Timeout(TimeSpan.FromMilliseconds(RWTimeout),"获取数据超时"); byte[] buffer = new byte[1024]; int count = 0; while (0 < (count = await stream.ReadAsync(buffer, 0, buffer.Length))) { //int count = await stream.ReadAsync(buffer, 0, buffer.Length); /*if (count == 0) { return; }*/ await ms.WriteAsync(buffer, 0, count); } this.results = ms.ToArray(); } }
我Trace.writeline(代码现在删除了)。output中显示在ReadAsync后没有下文。我在button的click异步事件中调用的。只是不想开新线程那些。开发起来方便。我这里想知道的是为什么死在了readasync上边。。死在上边的时候界面并未卡死。像光进了黑洞。。。。反有任何回应。
不太明白你所说的没有意义是什么意思?程序构架或者说设计上的?还是说错误原因是来自于await http.get?
@gnhao: 噢,这个我理解错误了