Html 中的Table 导入DataSet (提供单元格拆分功能)
时间:2011-05-24 来源:Stangray
public static DataSet GetPrivateItemList(HtmlDocument htmlDocument)
{
DataSet ds = new DataSet();
HtmlElementCollection TableCollection = htmlDocument.GetElementsByTagName("TABLE");
int tableIndex = 1;
foreach (HtmlElement tableElement in TableCollection)
{
HtmlElementCollection TrCollection = tableElement.GetElementsByTagName("TR");
int rows = TrCollection.Count;
int columns = 0;
foreach (HtmlElement trElement in TrCollection)
{
HtmlElementCollection TdCollection = trElement.GetElementsByTagName("TD");
int maxColumns = TdCollection.Count;
//最大列数
if (maxColumns > columns)
{
columns = maxColumns;
}
}
//创建一张空的映射表
DataTable memDataTable = CreateRelationTable(columns, rows,tableIndex);
int rowIndex = 0;
foreach (HtmlElement trElement in TrCollection)
{
HtmlElementCollection TdCollection = trElement.GetElementsByTagName("TD");
int tmpColumnIndex = 0;
foreach (HtmlElement td in TdCollection)
{
int tmpRowIndex = rowIndex;
int rowspan = int.Parse(td.GetAttribute("ROWSPAN"));
int colspan = int.Parse(td.GetAttribute("COLSPAN"));
if (memDataTable.Rows[tmpRowIndex][tmpColumnIndex].ToString() == string.Empty)
{
}
else
{
for (int j = 0; j < memDataTable.Columns.Count; j++)
{
if (memDataTable.Rows[tmpRowIndex][j].ToString() == string.Empty)
{
tmpColumnIndex = j;
break;
}
}
}
int doubleRowIndex = tmpRowIndex;
for (int i = 1; i <= rowspan; i++)
{
//处理跨行
memDataTable.Rows[tmpRowIndex][tmpColumnIndex] = td.InnerText;
tmpRowIndex++;
}
for (int j = 2; j <= colspan; j++)
{
tmpColumnIndex++;
for (int i = 2; i <= rowspan; i++)
{
//处理跨行
if (doubleRowIndex >= memDataTable.Rows.Count -1)
{
break;
}
memDataTable.Rows[doubleRowIndex + 1][tmpColumnIndex] = " ";
doubleRowIndex++;
}
}
tmpColumnIndex++;
}
rowIndex++;
}
//去除空列功能
for (int i = memDataTable.Columns.Count - 1; i >= 0; i--)
{
bool candelete = true;
for (int j = 0; j < memDataTable.Rows.Count; j++)
{
if (memDataTable.Rows[j][i].ToString().Trim().Length > 0)
{
candelete = false;
break;
}
}
if (candelete)
{
memDataTable.Columns.RemoveAt(i);
}
}
//去除空行
for (int i = memDataTable.Rows.Count - 1; i >= 0; i--)
{
bool candelete = true;
for (int j = 0; j < memDataTable.Columns.Count; j++)
{
if (memDataTable.Rows[i][j].ToString().Trim().Length > 0)
{
candelete = false;
break;
}
}
if (candelete)
{
memDataTable.Rows.RemoveAt(i);
}
}
ds.Tables.Add(memDataTable);
tableIndex++;
}
return ds;
}