htmlparser 我的程序,能用的
时间:2010-12-07 来源:笨笨在成长
String temp = "";
//string htmlcode = richTextBox1.Text;//"<HTML><HEAD><TITLE>AAA</TITLE></HEAD><BODY><a href='http://topic.csdn.net/u/20080522/14/0ff402ef-c382-499a-8213-ba6b2f550425.html'>连接1</a><a href='http://topic.csdn.net'>连接2</a></BODY></HTML>";
// 创建Parser对象根据传给字符串和指定的编码
Parser parser = Parser.CreateParser(htmlcode, "GBK");
// 创建HtmlPage对象HtmlPage(Parser parser)
HtmlPage page = new HtmlPage(parser);
//parser.VisitAllNodesWith(page);
NodeFilter filter = new TagNameFilter("div");
NodeFilter ftie = new AndFilter(new TagNameFilter("dl"), new HasAttributeFilter("id", "miaosha"));
NodeList nodelist = parser.Parse(ftie);
int count = nodelist.Count;
for (int i = 0; i < nodelist.Count; i++)
{
temp = nodelist[i].ToHtml();
Parser vparser = Parser.CreateParser(temp, "");
TextExtractingVisitor visitor = new TextExtractingVisitor();
vparser.VisitAllNodesWith(visitor);
temp = visitor.ExtractedText;
MessageBox.Show(temp);
}
}