我已经用过很多种方式来判断了,但效果都不怎么样?在未知网页的编码方式时,请问怎么才可以正常下载到网页,而不是乱码呢?请大家指教下
#region【获取网页HTML文本】 /// <summary> /// 获取url网页的HTML文本信息 /// </summary> /// <param name="url">网页URL</param> /// <param name="codeType">编码方式</param> /// <returns>返回HTML文本字符串</returns> public static String GetResponseText(string url, string codeType) { string responseFromServer = null; Stream dataStream = null; StreamReader reader = null; try { WebRequest request = WebRequest.Create(url); request.Credentials = CredentialCache.DefaultCredentials; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); if (response.StatusDescription == "OK") { try { dataStream = response.GetResponseStream(); reader = new StreamReader(dataStream, GetPageEncoding(url)); responseFromServer = reader.ReadToEnd(); Regex rex = new Regex(@"(?<=charset\s*=\s*)[^""]*?(?="")", RegexOptions.IgnoreCase); string charset = rex.Match(responseFromServer, 0).Value; if (!charset.Equals("utf-8")) //如果编码方式不是utf-8的话,则重新用默认方式下载网页 { reader = new StreamReader(dataStream, Encoding.Default); responseFromServer = reader.ReadToEnd(); } } finally { reader.Close(); dataStream.Close(); } } response.Close(); return responseFromServer; } catch (Exception ex) { return ex.Message; } } #endregion
public void NewCreateStaticPage(string url, string urlHTML)
{
System.Net.WebClient wc = new System.Net.WebClient();
wc.Credentials = System.Net.CredentialCache.DefaultCredentials;
byte[] buffer = wc.DownloadData(url);
string file = System.Text.Encoding.Default.GetString(buffer);
string pathHTML = HttpContext.Current.Server.MapPath(urlHTML);
using (FileStream fs = new FileStream(pathHTML, FileMode.Create))
{
StreamWriter sw = new StreamWriter(fs, UnicodeEncoding.UTF8);
sw.Write(file);
fs.Dispose();
fs.Close();
}
HttpContext.Current.Response.Redirect(urlHTML);
}
试试这段代码,先变成byte类型,然后再写入HTML文件。