import java
.util
.HashSet
;
import java
.util
.Iterator
;
import java
.util
.Map
;
import java
.util
.Set
;
public class SensitivewordFilter {
@SuppressWarnings("rawtypes")
private Map sensitiveWordMap
= null
;
public static int minMatchTYpe
= 1;
public static int maxMatchType
= 2;
public SensitivewordFilter(){
sensitiveWordMap
= new SensitiveWordInit().initKeyWord();
}
public boolean isContaintSensitiveWord(String txt
,int matchType
){
boolean flag
= false;
for(int i
= 0 ; i
< txt
.length() ; i
++){
int matchFlag
= this.CheckSensitiveWord(txt
, i
, matchType
);
if(matchFlag
> 0){
flag
= true;
}
}
return flag
;
}
public Set
<String> getSensitiveWord(String txt
, int matchType
){
Set
<String> sensitiveWordList
= new HashSet<String>();
for(int i
= 0 ; i
< txt
.length() ; i
++){
int length
= CheckSensitiveWord(txt
, i
, matchType
);
if(length
> 0){
sensitiveWordList
.add(txt
.substring(i
, i
+length
));
i
= i
+ length
- 1;
}
}
return sensitiveWordList
;
}
public String
replaceSensitiveWord(String txt
,int matchType
,String replaceChar
){
String resultTxt
= txt
;
Set
<String> set
= getSensitiveWord(txt
, matchType
);
Iterator
<String> iterator
= set
.iterator();
String word
= null
;
String replaceString
= null
;
while (iterator
.hasNext()) {
word
= iterator
.next();
replaceString
= getReplaceChars(replaceChar
, word
.length());
resultTxt
= resultTxt
.replaceAll(word
, replaceString
);
}
return resultTxt
;
}
private String
getReplaceChars(String replaceChar
,int length
){
String resultReplace
= replaceChar
;
for(int i
= 1 ; i
< length
; i
++){
resultReplace
+= replaceChar
;
}
return resultReplace
;
}
@SuppressWarnings({ "rawtypes"})
public int CheckSensitiveWord(String txt
,int beginIndex
,int matchType
){
boolean flag
= false;
int matchFlag
= 0;
char word
= 0;
Map nowMap
= sensitiveWordMap
;
for(int i
= beginIndex
; i
< txt
.length() ; i
++){
word
= txt
.charAt(i
);
nowMap
= (Map
) nowMap
.get(word
);
if(nowMap
!= null
){
matchFlag
++;
if("1".equals(nowMap
.get("isEnd"))){
flag
= true;
if(SensitivewordFilter
.minMatchTYpe
== matchType
){
break;
}
}
}
else{
break;
}
}
if(matchFlag
< 2 || !flag
){
matchFlag
= 0;
}
return matchFlag
;
}
public static void main(String
[] args
) {
SensitivewordFilter filter
= new SensitivewordFilter();
System
.out
.println("敏感词的数量:" + filter
.sensitiveWordMap
.size());
String string
= "新华社乌鲁木齐9月12日电(记者曹志恒)新疆吐鲁番地区中级人民法院12日对鄯善县“6·26”暴力恐怖案件中艾合买提尼亚孜·斯迪克等4名被告人一审公开开庭审理并当庭宣判,以组织领导恐怖组织罪、故意杀人罪、放火罪数罪并罚,判处艾合买提尼亚孜·斯迪克死刑,剥夺政治权利终身;以参加恐怖组织罪、故意杀人罪、放火罪数罪并罚"
+ ",判处吾拉音·艾力死刑,剥夺政治权利终身;以参加恐怖组织罪、故意杀人罪数罪并罚"
+ ",判处阿不都拉·斯热甫力死刑,剥夺政治权利终身;以参加恐怖组织罪、故意杀人罪、放火罪数罪并罚,判处艾克拉木·吾斯曼有期徒刑25年,剥夺政治权利5年。\r\n" +
"法庭审理查明,2010年4月以来,艾克拉木·吾斯曼、吾拉音·艾力、阿不都拉·斯热甫力先后与"
+ "艾力·艾合买提尼亚孜等数人在鄯善县鲁克沁镇多次聚集,从事非法宗教活动,宣扬宗教极端思想,收听观看境外恐怖组织煽动实施暴力恐怖活动的音视频,"
+ "传看宣扬宗教极端思想的书籍,接受宗教极端思想并就进行暴力恐怖活动达成共谋,进行暴恐体能训练,恐怖组织逐渐形成个人一杯红酒一部电影在夜三级片 深人静的晚上,关上电话静静的发呆着。";
System
.out
.println("待检测语句字数:" + string
.length());
long beginTime
= System
.currentTimeMillis();
Set
<String> set
= filter
.getSensitiveWord(string
, 1);
long endTime
= System
.currentTimeMillis();
System
.out
.println("语句中包含敏感词的个数为:" + set
.size() + "。包含:" + set
);
System
.out
.println("总共消耗时间为:" + (endTime
- beginTime
));
}
}
import java
.io
.BufferedReader
;
import java
.io
.File
;
import java
.io
.FileInputStream
;
import java
.io
.InputStreamReader
;
import java
.util
.HashMap
;
import java
.util
.HashSet
;
import java
.util
.Iterator
;
import java
.util
.Map
;
import java
.util
.Set
;
public class SensitiveWordInit {
private String ENCODING
= "UTF-8";
@SuppressWarnings("rawtypes")
public HashMap sensitiveWordMap
;
public SensitiveWordInit(){
super();
}
@SuppressWarnings("rawtypes")
public Map
initKeyWord(){
try {
Set
<String> keyWordSet
= readSensitiveWordFile();
addSensitiveWordToHashMap(keyWordSet
);
} catch (Exception e
) {
e
.printStackTrace();
}
return sensitiveWordMap
;
}
@SuppressWarnings({ "rawtypes", "unchecked" })
private void addSensitiveWordToHashMap(Set
<String> keyWordSet
) {
sensitiveWordMap
= new HashMap(keyWordSet
.size());
String key
= null
;
Map nowMap
= null
;
Map
<String, String> newWorMap
= null
;
Iterator
<String> iterator
= keyWordSet
.iterator();
while(iterator
.hasNext()){
key
= iterator
.next();
nowMap
= sensitiveWordMap
;
for(int i
= 0 ; i
< key
.length() ; i
++){
char keyChar
= key
.charAt(i
);
Object wordMap
= nowMap
.get(keyChar
);
if(wordMap
!= null
){
nowMap
= (Map
) wordMap
;
}
else{
newWorMap
= new HashMap<String,String>();
newWorMap
.put("isEnd", "0");
nowMap
.put(keyChar
, newWorMap
);
nowMap
= newWorMap
;
}
if(i
== key
.length() - 1){
nowMap
.put("isEnd", "1");
}
}
}
}
public static void main(String
[] args
) {
Set set
= new HashSet();
set
.add("中国");
set
.add("中国人民");
set
.add("中国人");
new SensitiveWordInit().addSensitiveWordToHashMap(set
);
}
@SuppressWarnings("resource")
private Set
<String> readSensitiveWordFile() throws Exception
{
Set
<String> set
= null
;
File filexx
=new File("");
String path
=filexx
.getAbsolutePath();
File file
= new File(path
+"\\src\\SensitiveWord.txt");
InputStreamReader read
= new InputStreamReader(new FileInputStream(file
),ENCODING
);
try {
if(file
.isFile() && file
.exists()){
set
= new HashSet<String>();
BufferedReader bufferedReader
= new BufferedReader(read
);
String txt
= null
;
while((txt
= bufferedReader
.readLine()) != null
){
set
.add(txt
);
}
}
else{
throw new Exception("敏感词库文件不存在");
}
} catch (Exception e
) {
throw e
;
}finally{
read
.close();
}
return set
;
}
}
转载请注明原文地址:https://ipadbbs.8miu.com/read-45780.html