public class Excel
{
/// <summary>
/// 将指定excel文件中读取第一张工作表的名称
/// </summary>
/// <param name="filepath">Excel的文件路径</param>
/// <returns>第一张工作表的名称</returns>
///
public static string GetSheetName(string filePath)
{
string sheetName = "";
FileStream tmpStream = File.OpenRead(filePath);
byte[] fileByte = new byte[tmpStream.Length];
tmpStream.Read(fileByte, 0, fileByte.Length);
tmpStream.Close();
//下面这段,定义这个tmByte数组的作用是什么?为什么这样定义
byte[] tmByte = new byte[]{Convert.ToByte(11),Convert.ToByte(0),Convert.ToByte(0),Convert.ToByte(0),Convert.ToByte(0)
,Convert.ToByte(0),Convert.ToByte(0),Convert.ToByte(0),Convert.ToByte(11),Convert.ToByte(0),Convert.ToByte(30),Convert.ToByte(16)
,Convert.ToByte(0),Convert.ToByte(0)};
int index = GetSheetIndex(fileByte, tmByte);
if (index > -1)
{
index += 16 + 12; //这里也不明白+16+12什么意思
System.Collections.ArrayList sheetNameList = new System.Collections.ArrayList();
for(int i=index;i<fileByte.Length-1;i++)
{
byte temp=fileByte[i];
if(temp!=Convert.ToByte(0))
sheetNameList.Add(temp);
else
break;
}
byte[] sheetNameByte=new byte [sheetNameList.Count];
for(int i=0;i<sheetNameList.Count;i++)
sheetNameByte[i]=Convert.ToByte(sheetNameList[i]);
sheetName=System.Text.Encoding.Default.GetString(sheetNameByte);
}
return sheetName;
}
/// <summary>
/// 只供方法GetSheetName()使用
/// </summary>
///
private static int GetSheetIndex(byte[] findTagrget, byte[] findItem)
{
int index = -1;
int findItemLength = findItem.Length;
if (findItemLength < 1)
return -1;
int findTargetLength = findTagrget.Length;
if ((findTargetLength - 1) < findItemLength)
return -1;
for (int i = findTargetLength - findItemLength - 1; i > -1; i--)
{
System.Collections.ArrayList tmpList = new System.Collections.ArrayList();
int find = 0;
for (int j = 0; j < findItemLength; j++)
{
if (findTagrget[i + j] == findItem[j])
find += 1;
}
if (find == findItemLength)
{
index = i;
break;
}
}
return index;
}
/// <summary>
/// 将指定html字符串的数据转换成datatable对象——根据“<tr><td>”等特殊字符进行处理
/// </summary>
/// <param name="tmphtml">Html字符串</param>
///
public static DataTable GetDataTableFromString(string tmphtml)
{
string tmpStr = tmphtml;
DataTable dt = new DataTable();
//先处理一下这个字符串,删除第一个<tr>之前和最后一个</tr>之后的部分
int index = tmpStr.IndexOf("<tr");
if (index > -1)
tmpStr = tmpStr.Substring(index);
else
return dt;
index = tmpStr.LastIndexOf("</td>");
if (index > -1)
tmpStr = tmpStr.Substring(0, index + 5);
else
return dt;
bool existsSparator = false;
char separator = Convert.ToChar("^");
//如果原字符串中包含分隔符"^"则先把它替换掉
if (tmpStr.IndexOf(separator.ToString()) > -1)
{
existsSparator = true;
tmpStr = tmpStr.Replace("^", "^$&^");
}
//先根据"</tr>"分拆
string[] tmpRow = tmpStr.Replace("</tr>", "^").Split(separator);
for (int i = 0; i<tmpRow.Length - 1; i++)
{
DataRow newRow = dt.NewRow();
string tmpStri = tmpRow[i];
if (tmpStri.IndexOf("<tr") > -1)
{
tmpStri = tmpStri.Substring(tmpStri.IndexOf("<tr"));
if (tmpStri.IndexOf("display:none") < 0 || tmpStri.IndexOf("display:none") > tmpStri.IndexOf(">"))
{
tmpStri = tmpStri.Replace("</td>", "^");
string[] tmpField = tmpStri.Split(separator);
for (int j = 0; j < tmpField.Length - 1; j++)
{
tmpField[j] = RemoveString(tmpField[j], "<font>"); ;
index = tmpField[j].LastIndexOf(">") + 1;
if (index>0)
{
string field = tmpField[j].Substring(index, tmpField[j].Length - index);
if (existsSparator)
field = field.Replace("^$&^", "^");
if (i == 0)
{
string tmpFieldName = field;
int sn = 1;
while (dt.Columns.Contains(tmpFieldName))
{
tmpFieldName = field + sn.ToString();
sn += 1;
}
dt.Columns.Add(tmpFieldName);
}
else
{
newRow[j] = field;
}
}//end of if(index>0)
}
}
if (i > 0)
dt.Rows.Add(newRow);
}
}
dt.AcceptChanges();
return dt;
}
/// <summary>
/// 从指定的html字符串中剔除指定的对象
/// </summary>
/// <param name="tmpHtml">Html字符串</param>
/// <param name="remove">需要剔除的对象——例如输入"<font>"则剔除"<font???????>"和"</font>>"</param>
/// <retuns></retuns>
///
public static string RemoveString(string tmpHtml, string remove)
{
tmpHtml = tmpHtml.Replace(remove.Replace("<", "</"), "");
tmpHtml = RemoveStringHead(tmpHtml, remove); ;
return tmpHtml;
}
/// <summary>
/// 只供方法RemoveString()使用
/// </summary>
/// <param name="tmpHtml"></param>
/// <param name="remove"></param>
/// <returns></returns>
private static string RemoveStringHead(string tmpHtml, string remove)
{
//为了方便注释,假设输入参数remove="<font>"
if (remove.Length < 1)
return tmpHtml; //参数remove为空:不处理返回
if((remove.Substring(0,1)!="<"||(remove.Substring(remove.Length-1)!=">")))
return tmpHtml; //参数remove不是<????>:不处理返回
int indexs=tmpHtml.IndexOf(remove.Replace(">","")); //查找"<font"的位置
int indexe=-1;
if(indexs>-1)
{
string tmpRight=tmpHtml.Substring(indexs,tmpHtml.Length-indexs);
indexe=tmpRight.IndexOf(">");
if(indexe>-1)
tmpHtml=tmpHtml.Substring(0,indexs)+tmpHtml.Substring(indexs+ indexe+1);
if(tmpHtml.IndexOf(remove.Replace(">",""))>-1)
tmpHtml=RemoveStringHead(tmpHtml,remove);
}
return tmpHtml;
}
整体的功能如何去实现的呢?很多都不是很明白。
楼主贴这么代码很少有人会细心的看,建议楼主可以整理下自己具体哪几个点有问题,多做些描述这样会有更多的人来帮助。
前面两个函数是按照二进制格式解析excel文件,后面三个函数是用来将html文本转换成datatable.
为什么是定义tmpbyte,为什么是16+12?因为excel文件格式就是那么定的.
这两种方式,我都不推荐.