关键字过滤算法
时间:2010-09-19 来源:小.荷
using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Collections;
namespace BLL.Common
{
#region 操作类
public class KeywordsFilter
{
#region 关键字过滤
/// <summary>
/// 关键字过滤
///
/// </summary>
/// <param name="keywords"></param>
/// <returns></returns>
public static string Filter(string keywords)
{
//需过滤关键字集合
List<string> badwords = new List<string>();
KeywordsFilterClass kf = new KeywordsFilterClass();
keywords = kf.BadwordInKeywords(keywords, badwords);
return keywords;
}
#endregion
}
#endregion
#region 关键字过滤类
/// <summary>
/// 关键字过滤类
/// </summary>
public class KeywordsFilterClass
{
private Dictionary<string, object> hash = new Dictionary<string, object>();
//脏字字典 开头脏字存储
private BitArray firstCharCheck = new BitArray(char.MaxValue);
//脏字字典 单个char存储
private BitArray allCharCheck = new BitArray(char.MaxValue);
private int maxLength = 0;
/// <summary>
/// 初始化 已存储的 过滤字符串
/// </summary>
/// <param name="words"></param>
private void InitHash(List<string> badwords)
{
foreach (string word in badwords)
{
//保存字典内不存在的脏字
if (!hash.ContainsKey(word))
{
hash.Add(word, null);
//设置脏字计算长度
this.maxLength = Math.Max(this.maxLength, word.Length);
firstCharCheck[word[0]] = true;
foreach (char c in word)
{
allCharCheck[c] = true;
}
}
}
}
/// <summary>
/// 替换字符串中的脏字为指定的字符
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
public string BadwordInKeywords(string text, List<string> badwords)
{
//初始化 脏字字典
this.InitHash(badwords);
int index = 0;
while (index < text.Length)
{
//判断开头脏字
if (!firstCharCheck[text[index]])
{
//未找到开头脏字 则索引累加
while (index < text.Length - 1 && !firstCharCheck[text[++index]]) ;
}
for (int j = 1; j <= Math.Min(maxLength, text.Length - index); j++)
{
if (!allCharCheck[text[index + j - 1]])
{
break;
}
string sub = text.Substring(index, j);
if (hash.ContainsKey(sub))
{
text = text.Replace(sub, "**");
//this.InitHash(badwords);
index += j;
break;
}
}
index++;
}
return text;
}
}
#endregion
}
相关阅读 更多 +