htmlparser 分析网页:判断是否相同..
时间:2010-10-22 来源:slddyb
package test;
import org.htmlparser.*;
import org.htmlparser.filters.*;
import org.htmlparser.http.ConnectionManager;
import org.htmlparser.lexer.*;
import org.htmlparser.parserapplications.filterbuilder.Filter;
import org.htmlparser.util.*;
public class FilterTest {
public static void main(String[] args)
{
ConnectionManager manager;
try
{
manager = Page.getConnectionManager();
Parser parser = new Parser(manager.openConnection("http://www.cc98.org/list.asp?boardid=15"));
Parser parser1=new Parser(manager.openConnection("F://htmls/98.4.2.html"));
parser1.setEncoding("utf-8");
parser.setEncoding("utf-8");
// NodeFilter filter=new HasParentFilter(mainfilter);
// NodeFilter filter=new AndFilter(new StringFilter("尾页"),new HasSiblingFilter(new HasAttributeFilter("color","#FF0000")));
NodeFilter filter = new AndFilter(new TagNameFilter("div"),new HasChildFilter(new TagNameFilter("font")));
NodeList nodelist=parser.parse(filter);
NodeFilter filter1=new AndFilter(new TagNameFilter("div"),new HasChildFilter(new StringFilter("font")));
NodeList nodelist1=parser1.parse(filter1);
NodeIterator it1=nodelist1.elements();
while(it1.hasMoreNodes())
{
Node node1=(Node)it1.nextNode();
System.out.println(node1.toHtml());
}
System.out.println("------------------------------------------------");
NodeIterator it=nodelist.elements();
while(it.hasMoreNodes())
{
Node node=(Node)it.nextNode();
System.out.println(node.toHtml());
}
if(nodelist1.toString().equals(nodelist.toString()))
System.out.println("same!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
}
import org.htmlparser.*;
import org.htmlparser.filters.*;
import org.htmlparser.http.ConnectionManager;
import org.htmlparser.lexer.*;
import org.htmlparser.parserapplications.filterbuilder.Filter;
import org.htmlparser.util.*;
public class FilterTest {
public static void main(String[] args)
{
ConnectionManager manager;
try
{
manager = Page.getConnectionManager();
Parser parser = new Parser(manager.openConnection("http://www.cc98.org/list.asp?boardid=15"));
Parser parser1=new Parser(manager.openConnection("F://htmls/98.4.2.html"));
parser1.setEncoding("utf-8");
parser.setEncoding("utf-8");
// NodeFilter filter=new HasParentFilter(mainfilter);
// NodeFilter filter=new AndFilter(new StringFilter("尾页"),new HasSiblingFilter(new HasAttributeFilter("color","#FF0000")));
NodeFilter filter = new AndFilter(new TagNameFilter("div"),new HasChildFilter(new TagNameFilter("font")));
NodeList nodelist=parser.parse(filter);
NodeFilter filter1=new AndFilter(new TagNameFilter("div"),new HasChildFilter(new StringFilter("font")));
NodeList nodelist1=parser1.parse(filter1);
NodeIterator it1=nodelist1.elements();
while(it1.hasMoreNodes())
{
Node node1=(Node)it1.nextNode();
System.out.println(node1.toHtml());
}
System.out.println("------------------------------------------------");
NodeIterator it=nodelist.elements();
while(it.hasMoreNodes())
{
Node node=(Node)it.nextNode();
System.out.println(node.toHtml());
}
if(nodelist1.toString().equals(nodelist.toString()))
System.out.println("same!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
}
相关阅读 更多 +