首页新闻找找看学习计划

网络异步。async await 死在了ReadAsync读取数据上?请问是有何解决方案?

0
悬赏园豆:100 [已解决问题] 解决于 2013-07-16 12:46
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Nipusa.Ext;
using System.Diagnostics;

namespace Nipusa.Net
{
    /// <summary>
    /// 主要的目的是为了cookie的自动处理,编码自动处理 author:http://www.nipusa.net/
    /// </summary>
    public class Http
    {
        /// <summary>
        /// cookie修正,主要用于回发所有cookie的临时保存
        /// </summary>
        List<Cookie> cookies_revise = new List<Cookie>();

        public Http()
        {
            ConnTimeout = 1000*10;
            RWTimeout = 1000*10;
            AutoRedirect = true;
            Proxy = null;
            Cookies = new CookieContainer();
            Headers = new Dictionary<string, string>();
            results = null;

        }
 
        Uri reqUri;
        /*
        /// <summary>
        /// key=网址,value=网址对应编码 用于post编码识别{此想法未实现}
        /// </summary>
        //Dictionary<string, string> encoding = new Dictionary<string, string>();*/
        public Uri ReqUri { get { return reqUri; } }
        public int ConnTimeout { get; set; }
        public int RWTimeout { get; set; }
        public bool AutoRedirect { get; set; }
        /// <summary>
        /// 被封的网站或须要登陆才能访问的网页可以通过此设置
        /// </summary>
        public IProxy Proxy { get; set; }
        public CookieContainer Cookies { get; set; }
        public Dictionary<string, string> Headers { get; set; }
        public ResponseInfo Response { get; set; }
        byte[] results;
        /// <summary>
        /// 上一次的请求结果
        /// </summary>
        public byte[] Results { get { return results; } }

        HttpWebRequest Request(Uri uri)
        {
            Trace.WriteLine("Request(Uri uri)");

            this.reqUri = uri;

            var req = WebRequest.CreateHttp(uri);// as HttpWebRequest;
            req.Proxy = null;
            req.AllowAutoRedirect = AutoRedirect;
            req.Timeout = ConnTimeout;
            req.ReadWriteTimeout = RWTimeout;
            req.Accept = "text/html, application/xhtml+xml, */*";
            req.UserAgent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)";
            req.Headers["Accept-Encoding"] = "gzip, deflate";
            req.AutomaticDecompression = DecompressionMethods.GZip;

            foreach (var h in Headers)
            {
                switch (h.Key)
                {
                    case "User-Agent":
                        req.UserAgent = h.Value;
                        break;
                    case "METHOD":
                        req.Method = h.Value;
                        break;
                    case "Referer":
                        req.Referer = h.Value;
                        break;
                    case "Content-Type":
                        req.ContentType = h.Value;
                        break;
                    default:
                        req.Headers[h.Key] = h.Value;
                        break;
                }
            }

            //cookies
            req.CookieContainer = Cookies;
            cookies_revise.ForEach(c => { req.CookieContainer.Add(uri, new Cookie(c.Name, c.Value)); });
            return req;
        }

        public async Task<byte[]> Get(Uri uri)
        {
            Trace.WriteLine("Get(Uri uri)");
            Headers["METHOD"] = "GET";
            byte[] bts = await DoRequest(uri);
            return bts;
        }

        public async Task<byte[]> Post(Uri uri, Dictionary<string, string> form, string encoding = "utf-8")
        {
            Headers["METHOD"] = "POST";
            Headers["Content-Type"] = "application/x-www-form-urlencoded";
            byte[] bts = await DoRequest(uri, form, encoding);
            return bts;
        }

        public async Task<byte[]> DoRequest(Uri uri, Dictionary<string, string> form = null, string form_encoding = null)
        {
            Trace.WriteLine("DoRequest(Uri uri, Dictionary<string, string> form = null, string form_encoding = null)");
            var req = Request(uri);
            if ("POST" == req.Method)
            {
                if (null == form || 1 > form.Count)
                {
                    throw new Exception("表单没有值");
                }

                List<string> formlst = new List<string>();
                foreach (var kv in form)
                {
                    if (string.Empty == kv.Key)
                        formlst.Add(kv.Value);
                    else
                        formlst.Add(kv.Key + "=" + kv.Value);
                }

                using (var stream = await req.GetRequestStreamAsync())
                {
                    var buffer = Encoding.GetEncoding(form_encoding).GetBytes(string.Join("&", formlst));
                    Trace.WriteLine("stream.WriteAsync(buffer, 0, buffer.Length)");
                    await stream.WriteAsync(buffer, 0, buffer.Length);//.Timeout(TimeSpan.FromMilliseconds(RWTimeout),"提交数据超时");
                }

            }

            Trace.WriteLine("req.GetResponseAsync()");
            using (var res = await req.GetResponseAsync()  as HttpWebResponse)
            {
                //拷贝对属性
                this.Response = new ResponseInfo()
                {
                    CharacterSet = res.CharacterSet,
                    ContentEncoding = res.ContentEncoding,
                    ContentLength = res.ContentLength,
                    ContentType = res.ContentType,
                    Cookies = res.Cookies,
                    Headers = res.Headers,
                    IsFromCache = res.IsFromCache,
                    IsMutuallyAuthenticated = res.IsMutuallyAuthenticated,
                    LastModified = res.LastModified,
                    Method = res.Method,
                    ProtocolVersion = res.ProtocolVersion,
                    ResponseUri = res.ResponseUri,
                    Server = res.Server,
                    StatusCode = res.StatusCode,
                    StatusDescription = res.StatusDescription,
                    SupportsHeaders = res.SupportsHeaders
                };
                //用于修正cookie
                CookieCollection ccl = res.Cookies;
                cookies_revise.Clear();
                foreach (Cookie ck in ccl) { cookies_revise.Add(ck); }

                using (var ms = new MemoryStream())
                {
                    using (var stream = res.GetResponseStream())
                    {
                       
                        //await stream.CopyToAsync(ms);//.Timeout(TimeSpan.FromMilliseconds(RWTimeout),"获取数据超时");
                        byte[] buffer = new byte[1024];
                        int count = 0;
                        Trace.WriteLine("stream.ReadAsync(buffer, 0, buffer.Length))");
                        while (0<(count=await stream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false)))
                        {
                            //int count = await stream.ReadAsync(buffer, 0, buffer.Length);
                            /*if (count == 0)
                            {
                                return;
                            }*/
                            Trace.WriteLine("ms.WriteAsync(buffer, 0, count);" + count.ToString());
                            await ms.WriteAsync(buffer, 0, count);
                        }
                        Trace.WriteLine("toarray");
                        this.results = ms.ToArray();
                    }
                }
            }

            return this.results;
        }

        /// <summary>
        /// null表示自动识别编码
        /// </summary>
        /// <param name="ec"></param>
        /// <returns></returns>
        public string Str(string encoding = null)
        {
            if (null == this.results)
                return null;

            if (null != encoding)
            {
                return Encoding.GetEncoding(encoding).GetString(this.results);
            }
            else
            {
                if ("ISO-8859-1" == Response.CharacterSet)
                    Response.CharacterSet = "";

                if (string.Empty != Response.CharacterSet)
                {
                    return Encoding.GetEncoding(Response.CharacterSet).GetString(this.results);
                }
                else
                {
                    Encoding utf8 = Encoding.GetEncoding("utf-8");
                    string temp = utf8.GetString(this.results);
                    Match macth = Regex.Match(temp, @"<meta.*?charset=[^\w]?(?<cs>[-\w]+).*?>", RegexOptions.IgnoreCase);
                    if (macth.Success)
                    {
                        if (utf8.BodyName == macth.Groups["cs"].Value.Trim().ToLower())
                            return temp;
                        try
                        {
                            return Encoding.GetEncoding(macth.Groups["cs"].Value.Trim()).GetString(this.results);
                        }
                        catch (Exception ex)
                        {
                        }

                    }

                PROM_LETTER:
                    //查找到常用字就直接返回。找不到用gbk编码一次直接返回
                    if ((new Regex("(的|得|一|二|三|四|五|六|七|八|九|首|页|版|权|联|系|了|是|不|我|在|人|们|有|地|和|大|着|主|中|上|为|们|个|用|工|时|要|动|国|产|以|到|会|作|他|她|它|来|分|生|对|于|学|下|级|就|义|年|这|于|对|而|也)").IsMatch(temp)))
                    {
                        return temp;
                    }

                    return Encoding.GetEncoding("gbk").GetString(this.results);

                }
            }
        }

        public Image Img()
        {
            if (null == this.results)
                return null;

            Image img = null;
            MemoryStream ms = new MemoryStream(this.results);
            img = Image.FromStream(ms);
            return img;
        }

        public List<Uri> Links()
        {
            List<Uri> lst = new List<Uri>();
            if (null != this.results)
            {
                HtmlDocument html_doc = new HtmlDocument();
                html_doc.LoadHtml(Str());
                HtmlNodeCollection nodes = html_doc.DocumentNode.SelectNodes("//a[@href]");
                if (null != nodes)
                {
                    string cdomain = Response.ResponseUri.Domain();
                    foreach (HtmlNode node in nodes)
                    {
                        Uri uri = null;
                        string url = node.Attributes["href"].Value;
                        if (-1 < url.IndexOf("#"))
                            continue;
                        if (url.StartsWith("javascript:"))
                            continue;

                        if (-1 < url.IndexOf("://"))
                            Uri.TryCreate(url, UriKind.Absolute, out uri);
                        else
                            Uri.TryCreate(Response.ResponseUri, url, out uri);
                        if (null != uri)
                        {

                            if (!lst.Contains(uri) && cdomain == uri.Domain())
                                lst.Add(uri);
                        }

                    }
                }
            }
            return lst;
        }

    }
}

上边是代码片段。在button的click(async void)事件中。while(true){await ...}
发现在网络慢的时候。ReadAsync永不返回?
后来按http://neue.cc/2012/10/16_383.html
加入了超时。但是让界面卡死了。

界面调用代码。

 async void button1_Click(object sender, EventArgs e)
        {

                        Uri current_uri = await dqueue[file][0].De();
                        if (null != current_uri)// turi.Result)
                        {
                            try
                            {
                                //Task<byte[]> thttp =

                                await http.Get(current_uri);
                                //thttp.ConfigureAwait(false);
                                //thttp.Wait();
                            }
                            catch  .......
        }

根据trace的输出。程序死在了ReadAsync上。输出了 Trace.WriteLine("stream.ReadAsync(buffer, 0, buffer.Length))");再也不返回了。

问题补充:

现在把http类里的代码变成同步。程序运行一天。无任何问题。

下边是调用代码

     try
                            {
                                //Task<byte[]> thttp =

                                //await http.Get(current_uri);
                                await Task.Run(() => http.Get(current_uri));
                                //thttp.ConfigureAwait(false);
                                //thttp.Wait();
                            }
gnhao的主页 gnhao | 初学一级 | 园豆:9
提问于:2013-07-15 19:45
< >
分享
最佳答案
0

需要用委托吧

线程超时需要处理,重新发送请求,进行轮询

收获园豆:50
二十三号同学 | 小虾三级 |园豆:941 | 2013-07-16 11:29

发现在网络慢的时候。ReadAsync永不返回?
后来按http://neue.cc/2012/10/16_383.html
加入了超时。但是让界面卡死了。

加了超时的。

gnhao | 园豆:9 (初学一级) | 2013-07-16 11:31

@gnhao: 是不是内存锁定了?

看代码没看出头绪来,只能根据经验提供参考

二十三号同学 | 园豆:941 (小虾三级) | 2013-07-16 11:37

@二十三号同学: 有没示例或资料?你是指死锁?我怀疑是死锁。但我的异步代码没新开任何task或thread.它完全工作在ui线程。按理应该卡才对。但它不卡。这个情况在网络慢的时候容易发生。基本上和:http://msdn.microsoft.com/zh-cn/library/vstudio/hh300224.aspx?cs-save-lang=1&cs-lang=csharp#code-snippet-2 这里的代码没两样。现在代码换成同步的了。没有任何问题。程序运行一天了。调用代码只是这样变了一下。 await Task.Run(() => http.Get(current_uri));页非异步的await http.get。其实上边封装的很简单的。改成同步只是去掉task async await并调用同步方法。我看.net内部也是调用了begin end对去支持实现 async方法。现在真心搞不懂了。

gnhao | 园豆:9 (初学一级) | 2013-07-16 11:46
其他回答(1)
0

代码太长,没法全看。

await http.Get(current_uri);

这样的异步调用没有任何意义。异步调用的基本模式是:

调用异步方法

执行不需要异步方法结果的代码

await先前调用异步方的法返回值等待异步方法的执行结果

收获园豆:50
天边彩云 | 园豆:629 (小虾三级) | 2013-07-16 10:36

核心的在

        using (var ms = new MemoryStream())
                {
                    using (var stream = res.GetResponseStream())
                    {
                        //await stream.CopyToAsync(ms);//.Timeout(TimeSpan.FromMilliseconds(RWTimeout),"获取数据超时");
                        byte[] buffer = new byte[1024];
                        int count = 0;
                        while (0 < (count = await stream.ReadAsync(buffer, 0, buffer.Length)))
                        {
                            //int count = await stream.ReadAsync(buffer, 0, buffer.Length);
                            /*if (count == 0)
                            {
                                return;
                            }*/
                            await ms.WriteAsync(buffer, 0, count);
                        }
                        this.results = ms.ToArray();
                    }
                }

我Trace.writeline(代码现在删除了)。output中显示在ReadAsync后没有下文。我在button的click异步事件中调用的。只是不想开新线程那些。开发起来方便。我这里想知道的是为什么死在了readasync上边。。死在上边的时候界面并未卡死。像光进了黑洞。。。。反有任何回应。

支持(0) 反对(0) gnhao | 园豆:9 (初学一级) | 2013-07-16 11:36

不太明白你所说的没有意义是什么意思?程序构架或者说设计上的?还是说错误原因是来自于await http.get?

支持(0) 反对(0) gnhao | 园豆:9 (初学一级) | 2013-07-16 11:38

@gnhao: 噢,这个我理解错误了

支持(0) 反对(0) 天边彩云 | 园豆:629 (小虾三级) | 2013-07-16 12:57
清除回答草稿
   您需要登录以后才能回答,未注册用户请先注册