过滤html 2...
时间:2010-08-13 来源:feizhiqiang
/**
* filter all html element.
* For example:< a href="www.sohu.com/test">hello!< /a>
* The filter result is :hello!
* Notice:This method filter the text between "< " and ">"
* @param element
* @return
*/
public static String getTxtWithoutHTMLElement (String element)
{
// String reg="< [^< |^>]+>";
// return element.replaceAll(reg,"");
if(null==element||"".equals(element.trim()))
{
return element;
}
Pattern pattern=Pattern.compile("< [^< |^>]*>");
Matcher matcher=pattern.matcher(element);
StringBuffer txt=new StringBuffer();
while(matcher.find())
{
String group=matcher.group();
if(group.matches("< [\\s]*>"))
{
matcher.appendReplacement(txt,group);
}
else
{
matcher.appendReplacement(txt,"");
}
}
matcher.appendTail(txt);
repaceEntities(txt,"&","&");
repaceEntities(txt,"< ","< ");
repaceEntities(txt,">",">");
repaceEntities(txt,""","\"");
repaceEntities(txt," ","");
return txt.toString();
}
下面是测试用例:
public void testGetTxtWithoutHTMLElement ()
{
assertEquals("test",ExcelHssfView.getTxtWithoutHTMLElement("< a href='a/test'>test< /a>"));
assertEquals("test",ExcelHssfView.getTxtWithoutHTMLElement("< a href='a/test'>test"));
assertEquals("test",ExcelHssfView.getTxtWithoutHTMLElement("< input type='text'>test< /input>"));
assertEquals("test",ExcelHssfView.getTxtWithoutHTMLElement("< p>test"));
assertEquals("test",ExcelHssfView.getTxtWithoutHTMLElement("< table>< tr>< td>test< /td>< /tr>< /table>"));
assertEquals("te< st",ExcelHssfView.getTxtWithoutHTMLElement("< p>te< st"));
assertEquals("te>st",ExcelHssfView.getTxtWithoutHTMLElement("< p>te>st"));
assertEquals("tst",ExcelHssfView.getTxtWithoutHTMLElement("< p>t< e>st"));
assertEquals("t< st",ExcelHssfView.getTxtWithoutHTMLElement("< p>t< < e>st"));
assertEquals("< >test",ExcelHssfView.getTxtWithoutHTMLElement("< p>< >test"));
assertEquals("< >test",ExcelHssfView.getTxtWithoutHTMLElement("< p>< >test"));
assertEquals("< < >test",ExcelHssfView.getTxtWithoutHTMLElement("< p>< < >test"));
assertEquals("test",ExcelHssfView.getTxtWithoutHTMLElement("< table>< tr>< td> test< /td>< /tr>< /table>"));
}