使用前缀树匹配的方法实现的敏感词检测工具类。原始的代码出自此处

我在原始代码的基础上加了一些自己的修改,主要是去掉了@SuppressWarnings({ "rawtypes", "unchecked" })警告和修改最大匹配检测方式的实现。以下是完整代码

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/**
 * 敏感词检测 <br>
 * 修改自 https://blog.csdn.net/chenssy/article/details/26961957
 */
public enum SensitiveWordUtil {
	INSTANCE;
	private Map<Character, WordCharMapNode> sensitiveWordMap;
	public static int minMatchType = 1; // 最小匹配规则
	public static int maxMatchType = 2; // 最大匹配规则
	private int matchType = 2;

	public static SensitiveWordUtil getInstance() {
		return INSTANCE;
	}

	public void init(String[] words) {
		sensitiveWordMap = new HashMap<>(words.length); // 初始化敏感词容器,减少扩容操作
		for (String keyword : words) {
            keyword = keyword.trim();
			if (keyword.isEmpty()) {
				continue;
			}
			addSensitiveWord(keyword);
		}
	}

	public void setMatchType(int matchType) {
		this.matchType = matchType;
	}

	public int getMatchType() {
		return matchType;
	}

	public void resetMatchType() {
		matchType = 2;
	}

	public void addSensitiveWord(String keyword) {
		Map<Character, WordCharMapNode> nowMap = sensitiveWordMap;
		int wordLength = keyword.length();
		for (int i = 0; i < wordLength; i++) {
			char keyChar = keyword.charAt(i); // 转换成char型
			WordCharMapNode nextNode = nowMap.get(keyChar); // 获取
			if (nextNode == null) {
				nextNode = new WordCharMapNode(false, null);
				nowMap.put(keyChar, nextNode);
			}
			if (i == wordLength - 1) {
				nextNode.setEnd(true);
			} else if (nextNode.getNextNodeMap() == null) {
				nextNode.setNextNodeMap(new HashMap<>());
				nowMap = nextNode.getNextNodeMap();
			} else {
				nowMap = nextNode.getNextNodeMap();
			}
		}
	}

	public boolean isContainSensitiveWord(String text) {
		return isContainSensitiveWord(text, matchType);
	}

	public boolean isContainSensitiveWord(String text, int matchType) {
		boolean flag = false;
		for (int i = 0; i < text.length(); i++) {
			int matchFlag = this.checkSensitiveWord(text, i, matchType); // 判断是否包含敏感字符
			if (matchFlag > 0) { // 大于0存在,返回true
				flag = true;
			}
		}
		return flag;
	}

	private int checkSensitiveWord(String text, int beginIndex, int matchType) {
		int matchFlag = 0;
		if (matchType == SensitiveWordUtil.maxMatchType) {
			matchFlag = checkSensitiveWordMaxMatch(text, beginIndex);
		} else {
			matchFlag = checkSensitiveWordMinMatch(text, beginIndex);
		}
		return matchFlag;
	}

	private int checkSensitiveWordMinMatch(String text, int beginIndex) {
		boolean flag = false; // 敏感词结束标识位
		int matchFlag = 0; // 匹配标识数默认为0
		char wordChar = 0;
		Map<Character, WordCharMapNode> nowMap = sensitiveWordMap;
		for (int i = beginIndex; i < text.length(); i++) {
			if (nowMap == null) {
				// 到达关键词末尾
				break;
			} else {
				wordChar = text.charAt(i);
				WordCharMapNode wordCharMapNode = nowMap.get(wordChar);
				if (wordCharMapNode != null) {
					nowMap = wordCharMapNode.getNextNodeMap();
					matchFlag++; // 找到相应key,匹配标识+1
					if (wordCharMapNode.isEnd) { // 如果为最后一个匹配规则,结束循环,返回匹配标识数
						flag = true; // 结束标志位为true
						break;
					}
				} else {
					break;
				}
			}
		}
		// 敏感词最小长度,设为1支持单字敏感词。本来条件应为matchFlag < 1 || !flag,
		// 但matchFlag非负且当matchFlag为0时不需要重置matchFlag故可省略条件matchFlag < 1
		if (!flag) {
			matchFlag = 0;
		}
		return matchFlag;
	}

	private int checkSensitiveWordMaxMatch(String text, int beginIndex) {
		boolean flag = false; // 敏感词结束标识位
		int workingMatchFlag = 0; // 匹配标识数默认为0
		int lastMatchFlag = 0;
		char wordChar = 0;
		Map<Character, WordCharMapNode> nowMap = sensitiveWordMap;
		for (int i = beginIndex; i < text.length(); i++) {
			if (nowMap == null) {
				// 到达关键词末尾
				break;
			} else {
				wordChar = text.charAt(i);
				WordCharMapNode wordCharMapNode = nowMap.get(wordChar);
				if (wordCharMapNode != null) {
					nowMap = wordCharMapNode.getNextNodeMap();
					++workingMatchFlag; // 找到相应key,匹配标识+1
					if (wordCharMapNode.isEnd) {
						flag = true; // 结束标志位为true
						lastMatchFlag = workingMatchFlag;
						continue;
					}
					flag = false;
				} else {
					break;
				}
			}
		}
		// 敏感词最小长度,设为1支持单字敏感词。本来条件应为matchFlag < 1 || !flag,
		// 但matchFlag非负且当matchFlag为0时不需要重置matchFlag故可省略条件matchFlag < 1
		if (!flag) {
			workingMatchFlag = lastMatchFlag;
		}
		return workingMatchFlag;
	}

	public Set<String> getSensitiveWord(String text) {
		return getSensitiveWord(text, matchType);
	}

	public Set<String> getSensitiveWord(String text, int matchType) {
		Set<String> sensitiveWordList = new HashSet<String>();
		int length = text.length();
		for (int i = 0; i < length;) {
			int wordLength = checkSensitiveWord(text, i, matchType); // 判断是否包含敏感字符
			if (wordLength > 0) { // 存在,加入list中
				sensitiveWordList.add(text.substring(i, i + wordLength));
				i = i + wordLength; // 减1的原因,是因为for会自增
			} else {
				++i;
			}
		}
		return sensitiveWordList;
	}

	public String replaceSensitiveWord(String txt, String replaceChar) {
		return replaceSensitiveWord(txt, matchType, replaceChar);
	}

	public String replaceSensitiveWord(String txt, int matchType, String replaceChar) {
		String resultTxt = txt;
		Set<String> set = getSensitiveWord(txt, matchType); // 获取所有的敏感词
		Iterator<String> iterator = set.iterator();
		String word = null;
		String replaceString = null;
		while (iterator.hasNext()) {
			word = iterator.next();
			replaceString = getReplaceChars(replaceChar, word.length());
			resultTxt = resultTxt.replaceAll(word, replaceString);
		}
		return resultTxt;
	}

	private String getReplaceChars(String replaceChar, int length) {
		String resultReplace = replaceChar;
		for (int i = 1; i < length; i++) {
			resultReplace += replaceChar;
		}
		return resultReplace;
	}

}

class WordCharMapNode {
	boolean isEnd;
	private Map<Character, WordCharMapNode> nextNodeMap;

	public WordCharMapNode(boolean isEnd, Map<Character, WordCharMapNode> nextNodeMap) {
		super();
		this.isEnd = isEnd;
		this.nextNodeMap = nextNodeMap;
	}

	public boolean isEnd() {
		return isEnd;
	}

	public void setEnd(boolean isEnd) {
		this.isEnd = isEnd;
	}

	public void setNextNodeMap(Map<Character, WordCharMapNode> nextNodeMap) {
		this.nextNodeMap = nextNodeMap;
	}

	public Map<Character, WordCharMapNode> getNextNodeMap() {
		return nextNodeMap;
	}

}