文章目录
背景介绍导入相关库数据探索数据预处理暗通道去雾算法
数据建模预先定义模型评估方法使用传统机器学习模型:支持向量机、随机森林、神经网络、集成学习Adaboost进行训练
使用CNN进行建模训练模型性能评估,以及不同模型性能比较数据集+源码
背景介绍
假设开发一款基于户外监控摄像头的山火/非法焚烧秸秆的预防系统。希望能够 在最短的时间内基于监控画面确定是否有烟火发生,然后人工快速介入, 确定是否是山火/非法焚烧秸秆的事件。最终交由当地的联防/公安/森林 等部门进行快速响应。 我们因此采集到了海量的户外图像,其中大致分为两类:没有任何烟火的 图像,有明显的烟/火出现的图像。图像的获得是基于经过培训的人工判 断然后直接从监控画面上截图。基于提供的图像,获得一个识别烟火的图像分类器。
导入相关库
import pandas
as pd
import numpy
as np
import os
import time
import cv2
from PIL
import Image
from PIL
import ImageEnhance
import itertools
import matplotlib
.pyplot
as plt
plt
.rcParams
['font.sans-serif'] = 'SimHei'
plt
.rcParams
['axes.unicode_minus'] = False
from sklearn
.ensemble
import RandomForestClassifier
from sklearn
.ensemble
import AdaBoostClassifier
from sklearn
.neural_network
import MLPClassifier
from sklearn
.svm
import SVC
from sklearn
.model_selection
import GridSearchCV
from sklearn
.metrics
import confusion_matrix
from sklearn
.metrics
import classification_report
from sklearn
.metrics
import accuracy_score
, mean_squared_error
, r2_score
, confusion_matrix
from sklearn
.metrics
import roc_curve
, auc
,recall_score
from sklearn
.preprocessing
import StandardScaler
from sklearn
.model_selection
import train_test_split
from sklearn
.metrics
import classification_report
,confusion_matrix
import warnings
warnings
.filterwarnings
('ignore')
数据探索
读取图片数据
nofog_path
="fogs/0/"
fog_path
="fogs/1/"
fog
=os
.listdir
(fog_path
)
nofog
=os
.listdir
(nofog_path
)
查看烟火图片
plt
.figure
(figsize
=(25,25))
print("有明显烟/火出现的图片:")
for i
in range(1,8):
ax
=plt
.subplot
(1,7,i
)
path
=fog_path
+fog
[i
]
ax
.set_xticks
([])
ax
.set_yticks
([])
ax
.imshow
(Image
.open(path
))
有明显烟/火出现的图片:
查看无烟火图片
plt
.figure
(figsize
=(25,25))
print("无明显烟/火出现的图片:")
for i
in range(1,8):
ax
=plt
.subplot
(1,7,i
)
path
=nofog_path
+nofog
[i
]
ax
.set_xticks
([])
ax
.set_yticks
([])
ax
.imshow
(Image
.open(path
))
无明显烟/火出现的图片:
plt
.title
("数据集正例/负例数目")
plt
.bar
(["烟火图片数量","无烟火图片数量"],[len(fog
),len(nofog
)])
数据预处理
暗通道去雾算法
暗通道去雾算法参考链接
这个去雾算法只针对彩色图像,而且对于低对比度的天空或者水面背景的去雾效果会产生块效应,去雾效果不好。 ** 因此在调用去雾算法前,先提高了图片的对比度 **再通过计算图片数据的像素均值作为特征进行训练
暗通道去雾算法实现
def zmMinFilterGray(src
, r
=7):
'''''最小值滤波,r是滤波器半径'''
return cv2
.erode
(src
,np
.ones
((2*r
-1,2*r
-1)))
def guidedfilter(I
, p
, r
, eps
):
'''''引导滤波,直接参考网上的matlab代码'''
height
, width
= I
.shape
m_I
= cv2
.boxFilter
(I
, -1, (r
,r
))
m_p
= cv2
.boxFilter
(p
, -1, (r
,r
))
m_Ip
= cv2
.boxFilter
(I
*p
, -1, (r
,r
))
cov_Ip
= m_Ip
-m_I
*m_p
m_II
= cv2
.boxFilter
(I
*I
, -1, (r
,r
))
var_I
= m_II
-m_I
*m_I
a
= cov_Ip
/(var_I
+eps
)
b
= m_p
-a
*m_I
m_a
= cv2
.boxFilter
(a
, -1, (r
,r
))
m_b
= cv2
.boxFilter
(b
, -1, (r
,r
))
return m_a
*I
+m_b
def getV1(m
, r
, eps
, w
, maxV1
):
'''''计算大气遮罩图像V1和光照值A, V1 = 1-t/A'''
V1
= np
.min(m
,2)
V1
= guidedfilter
(V1
, zmMinFilterGray
(V1
,7), r
, eps
)
bins
= 2000
ht
= np
.histogram
(V1
, bins
)
d
= np
.cumsum
(ht
[0])/float(V1
.size
)
for lmax
in range(bins
-1, 0, -1):
if d
[lmax
]<=0.999:
break
A
= np
.mean
(m
,2)[V1
>=ht
[1][lmax
]].max()
V1
= np
.minimum
(V1
*w
, maxV1
)
return V1
,A
def deHaze(m
, r
=81, eps
=0.001, w
=0.95, maxV1
=0.80, bGamma
=False):
Y
= np
.zeros
(m
.shape
)
V1
,A
= getV1
(m
, r
, eps
, w
, maxV1
)
for k
in range(3):
Y
[:,:,k
] = (m
[:,:,k
]-V1
)/(1-V1
/A
)
Y
= np
.clip
(Y
, 0, 1)
if bGamma
:
Y
= Y
**(np
.log
(0.5)/np
.log
(Y
.mean
()))
return Y
对数据集的所有图片进行处理
def read_data(file_path
):
'''
file_path:正例/负例图像的存放路径
return: features平均像素,newimg:增加对比度之后的图片
'''
pictures
=os
.listdir
(file_path
)
features
=[]
newimg
=[]
for i
in range(len(pictures
)):
path
=file_path
+pictures
[i
]
img
=Image
.open(path
)
enh_con
= ImageEnhance
.Contrast
(img
)
contrast
= 1.5
img_contrasted
= enh_con
.enhance
(contrast
)
img_contrasted
.save
("temp.jpg")
m
=deHaze
(cv2
.imread
("temp.jpg")/255.0)*255
try:
feature_matrix
= np
.zeros
((40,40))
for i
in range(0,m
.shape
[0]):
for j
in range(0,m
.shape
[1]):
feature_matrix
[i
][j
] = ((int(m
[i
,j
,0]) + int(m
[i
,j
,1]) + int(m
[i
,j
,2]))/3)
feature
= np
.reshape
(feature_matrix
, (40*40))
features
.append
(feature
)
newimg
.append
(m
)
except:
pass
return features
,newimg
features
,fog_contrasted
=read_data
(fog_path
)
fogs
=pd
.DataFrame
(features
)
fogs
['label']=[1 for i
in range(len(fogs
))]
features
,nofog_contrasted
=read_data
(nofog_path
)
no_fogs
=pd
.DataFrame
(features
)
no_fogs
['label']=[0 for i
in range(len(no_fogs
))]
df
=pd
.concat
([fogs
,no_fogs
],axis
=0)
imgs
=[]
imgs
.extend
(nofog_contrasted
)
imgs
.extend
(fog_contrasted
)
for i
in range(len(imgs
)):
if(imgs
[i
].shape
!=(40,40,3)):
imgs
[i
]=np
.resize
(imgs
[i
],(40,40,3))
print("数据集大小:",df
.shape
)
df
.head
()
数据集大小: (3609, 1601)
0123456789...159115921593159415951596159715981599label
0149.666667148.666667145.666667144.666667144.666667145.666667146.666667148.333333140.333333140.333333...30.66666731.00000043.00000027.00000025.66666736.00000027.33333322.33333326.66666711106.333333110.333333114.333333113.333333108.333333105.000000108.000000114.000000110.333333119.000000...70.33333369.66666785.00000080.66666779.00000077.66666770.00000074.66666773.00000012144.000000140.333333133.333333126.000000123.000000119.333333116.000000116.000000109.000000106.000000...34.00000038.33333341.00000044.00000044.00000041.00000033.00000025.00000020.0000001387.33333318.66666740.00000052.00000054.00000048.00000085.000000126.000000114.00000085.333333...115.666667115.666667114.666667112.000000111.000000110.000000111.000000113.000000114.6666671461.00000086.00000037.00000039.33333348.33333340.33333385.33333393.666667104.333333103.333333...74.66666778.66666775.33333384.66666769.33333352.66666743.33333333.00000039.3333331
5 rows × 1601 columns
预处理后的图片效果
plt
.figure
(figsize
=(25,25))
print("有明显烟/火出现的图片:")
for i
in range(1,8):
ax
=plt
.subplot
(1,7,i
)
ax
.set_xticks
([])
ax
.set_yticks
([])
ax
.imshow
(fog_contrasted
[i
]/255)
有明显烟/火出现的图片:
plt
.figure
(figsize
=(25,25))
print("无明显烟/火出现的图片:")
for i
in range(1,8):
ax
=plt
.subplot
(1,7,i
)
ax
.set_xticks
([])
ax
.set_yticks
([])
ax
.imshow
(nofog_contrasted
[i
]/255)
无明显烟/火出现的图片:
数据建模
预先定义模型评估方法
绘制混淆矩阵模型性能评估(准确率、召回率、漏报率、误报率)绘制ROC曲线
def plot_confusion_matrix(cm
, classes
,
normalize
=False,
title
='Confusion matrix',
cmap
=plt
.cm
.Blues
):
plt
.figure
()
plt
.imshow
(cm
, interpolation
='nearest', cmap
=cmap
)
plt
.title
(title
)
plt
.colorbar
()
tick_marks
= np
.arange
(len(classes
))
plt
.xticks
(tick_marks
, classes
, rotation
=45)
plt
.yticks
(tick_marks
, classes
)
fmt
= '.2f' if normalize
else 'd'
thresh
= cm
.max() / 2.
for i
, j
in itertools
.product
(range(cm
.shape
[0]), range(cm
.shape
[1])):
plt
.text
(j
, i
, format(cm
[i
, j
], fmt
),
horizontalalignment
="center",
color
="white" if cm
[i
, j
] > thresh
else "black")
plt
.tight_layout
()
plt
.ylabel
('True label')
plt
.xlabel
('Predicted label')
plt
.show
()
def model_performance_evaluation(model_name
, test
, pred
,spend_time
):
acc
= accuracy_score
(test
, pred
)
print(model_name
, '| 准确率: %.4f' %acc
)
pred
=pred
.astype
('float64')
false_positive_rate
,true_positive_rate
,thresholds
=roc_curve
(test
, pred
)
roc_auc
=auc
(false_positive_rate
, true_positive_rate
)
print(model_name
, '| AUC: %.4f' %roc_auc
)
cm
=confusion_matrix
(test
,pred
)
miss_report
=cm
[0][1] / (1.0 * cm
[0][1] + cm
[1][1])
false_report
=cm
[1][0] / (1.0 * cm
[0][0] + cm
[1][0])
print(model_name
,"| 漏报率为:%.4f"%miss_report
)
print(model_name
,"| 误报率为:%.4f"%false_report
)
print(model_name
,"| 训练时长(秒):%.4f"%spend_time
)
return acc
,roc_auc
,miss_report
,false_report
,spend_time
def plot_ROC_curve(y_test
,y_predict
):
false_positive_rate
,true_positive_rate
,thresholds
=roc_curve
(y_test
, y_predict
)
roc_auc
=auc
(false_positive_rate
, true_positive_rate
)
plt
.title
('ROC')
plt
.plot
(false_positive_rate
, true_positive_rate
,'b',label
='AUC = %0.2f'% roc_auc
)
plt
.legend
(loc
='lower right')
plt
.plot
([0,1],[0,1],'r--')
plt
.ylabel
('TPR')
plt
.xlabel
('FPR')
plt
.show
()
使用传统机器学习模型:支持向量机、随机森林、神经网络、集成学习Adaboost进行训练
def train_model(label
,data
,model_name
):
'''
data:训练数据
model_name:模型名称
model:sklearn模型
'''
y
=label
X
=data
X_train
,X_test
,y_train
,y_test
=train_test_split
(X
,y
,test_size
=0.1,random_state
=0)
if model_name
=='随机森林':
grid
=RandomForestClassifier
(n_estimators
=100,min_samples_leaf
=10,random_state
= 0)
if model_name
=='支持向量机':
grid
=SVC
(kernel
='rbf',C
=10,random_state
= 0)
if model_name
=='神经网络':
grid
=MLPClassifier
(random_state
=0)
if model_name
=='adaboost':
grid
=AdaBoostClassifier
(n_estimators
=100,random_state
= 0)
clf
=grid
start
= time
.time
()
model
=clf
.fit
(X_train
,y_train
)
end
=time
.time
()
spend_time
=end
-start
y_pred
=model
.predict
(X_test
)
print(model_name
+"分类评估报告")
cnf_matrix
= confusion_matrix
(y_test
, y_pred
)
np
.set_printoptions
(precision
=2)
class_names
= [0,1]
test_report
=classification_report
(y_test
,y_pred
)
print(test_report
)
plot_confusion_matrix
(cnf_matrix
, classes
=class_names
, title
='Confusion matrix')
plot_ROC_curve
(y_test
,y_pred
)
print(model_name
+"在训练集上的性能 -- ")
model_performance_evaluation
(model_name
, y_train
, clf
.predict
(X_train
),spend_time
)
print("=========================================")
print(model_name
+"在测试集上的性能 -- ")
return list(model_performance_evaluation
(model_name
, y_test
, y_pred
,spend_time
))
from sklearn
.preprocessing
import StandardScaler
data
=df
[df
.columns
[:-1]]
label
=df
['label']
scaler
= StandardScaler
()
data
= scaler
.fit_transform
(data
)
RF_result
=train_model
(label
,data
,"随机森林")
随机森林分类评估报告
precision recall f1-score support
0 0.83 0.87 0.85 197
1 0.84 0.79 0.81 164
avg / total 0.83 0.83 0.83 361
随机森林在训练集上的性能 --
随机森林 | 准确率: 0.9406
随机森林 | AUC: 0.9386
随机森林 | 漏报率为:0.0531
随机森林 | 误报率为:0.0643
随机森林 | 训练时长(秒):7.1245
=========================================
随机森林在测试集上的性能 --
随机森林 | 准确率: 0.8338
随机森林 | AUC: 0.8298
随机森林 | 漏报率为:0.1623
随机森林 | 误报率为:0.1691
随机森林 | 训练时长(秒):7.1245
SVM_result
=train_model
(label
,data
,"支持向量机")
支持向量机分类评估报告
precision recall f1-score support
0 0.90 0.82 0.86 197
1 0.81 0.89 0.85 164
avg / total 0.86 0.85 0.85 361
支持向量机在训练集上的性能 --
支持向量机 | 准确率: 0.9717
支持向量机 | AUC: 0.9731
支持向量机 | 漏报率为:0.0483
支持向量机 | 误报率为:0.0109
支持向量机 | 训练时长(秒):12.2925
=========================================
支持向量机在测试集上的性能 --
支持向量机 | 准确率: 0.8532
支持向量机 | AUC: 0.8563
支持向量机 | 漏报率为:0.1934
支持向量机 | 误报率为:0.1000
支持向量机 | 训练时长(秒):12.2925
MLP_result
=train_model
(label
,data
,"神经网络")
神经网络分类评估报告
precision recall f1-score support
0 0.81 0.65 0.72 197
1 0.66 0.82 0.73 164
avg / total 0.74 0.73 0.73 361
神经网络在训练集上的性能 --
神经网络 | 准确率: 0.9704
神经网络 | AUC: 0.9710
神经网络 | 漏报率为:0.0418
神经网络 | 误报率为:0.0193
神经网络 | 训练时长(秒):10.2106
=========================================
神经网络在测试集上的性能 --
神经网络 | 准确率: 0.7285
神经网络 | AUC: 0.7359
神经网络 | 漏报率为:0.3366
神经网络 | 误报率为:0.1887
神经网络 | 训练时长(秒):10.2106
ADA_result
=train_model
(label
,data
,"adaboost")
adaboost分类评估报告
precision recall f1-score support
0 0.77 0.64 0.70 197
1 0.64 0.77 0.70 164
avg / total 0.71 0.70 0.70 361
adaboost在训练集上的性能 --
adaboost | 准确率: 0.8150
adaboost | AUC: 0.8167
adaboost | 漏报率为:0.2282
adaboost | 误报率为:0.1445
adaboost | 训练时长(秒):37.8667
=========================================
adaboost在测试集上的性能 --
adaboost | 准确率: 0.7008
adaboost | AUC: 0.7065
adaboost | 漏报率为:0.3571
adaboost | 误报率为:0.2303
adaboost | 训练时长(秒):37.8667
使用CNN进行建模训练
import keras
from keras
.preprocessing
.image
import img_to_array
from keras
.utils
import to_categorical
from sklearn
.model_selection
import train_test_split
import numpy
as np
import random
from keras
.optimizers
import Adam
from keras
.preprocessing
.image
import ImageDataGenerator
from keras
.layers
import Dense
,Conv2D
,MaxPooling2D
,Flatten
,BatchNormalization
,Dropout
from keras
.optimizers
import SGD
from keras
.models
import Sequential
from keras
.optimizers
import RMSprop
import keras
.backend
as K
def cnn(channel
,height
,width
,classes
):
input_shape
= (channel
,height
,width
)
if K
.image_data_format
() == "channels_last":
input_shape
= (height
,width
,channel
)
model
= Sequential
()
model
.add
(Conv2D
(32,(5,5),padding
="same",activation
="relu",input_shape
=input_shape
,name
="conv1"))
model
.add
(Conv2D
(32,(5,5),padding
="same",activation
="relu",name
="conv2"))
model
.add
(MaxPooling2D
(pool_size
=(2,2),strides
=(2,2),name
="pool1"))
model
.add
(Conv2D
(64,(3,3),padding
="same",activation
="relu",name
="conv3"))
model
.add
(Conv2D
(64,(3,3),padding
="same",activation
="relu",name
="conv4"))
model
.add
(MaxPooling2D
(pool_size
=(2,2),strides
=(2,2),name
="pool2"))
model
.add
(Flatten
())
model
.add
(Dense
(256,activation
="relu",name
="fc1"))
model
.add
(Dense
(classes
,activation
="softmax",name
="fc2"))
return model
def train(aug
, model
,train_x
,train_y
,test_x
,test_y
):
start
=time
.time
()
model
.compile(loss
="categorical_crossentropy",optimizer
="Adam",metrics
=["accuracy"])
_history
= model
.fit_generator
(aug
.flow
(train_x
,train_y
,batch_size
=batch_size
),
validation_data
=(test_x
,test_y
),steps_per_epoch
=len(train_x
)//batch_size
,
epochs
=epochs
,verbose
=1)
end
=time
.time
()
spend_time
=end
-start
plt
.figure
()
N
= epochs
plt
.plot
(np
.arange
(0,N
),_history
.history
["loss"],label
="train_loss")
plt
.plot
(np
.arange
(0,N
),_history
.history
["val_loss"],label
="val_loss")
plt
.plot
(np
.arange
(0,N
),_history
.history
["accuracy"],label
="train_acc")
plt
.plot
(np
.arange
(0,N
),_history
.history
["val_accuracy"],label
="val_acc")
plt
.title
("loss and accuracy")
plt
.xlabel
("epoch")
plt
.ylabel
("acc/loss")
plt
.legend
(loc
="best")
plt
.show
()
return spend_time
channel
= 1
height
= 40
width
= 40
class_num
= 2
norm_size
= 32
batch_size
= 32
epochs
= 30
data
=data
.reshape
(-1,40,40,1)
label
=np
.array
(df
['label'].astype
('int'))
label
= to_categorical
(label
)
train_x
,test_x
, train_y
,test_y
= train_test_split
(data
,label
,test_size
=0.1,random_state
=0)
model
= cnn
(channel
=channel
, height
=height
,width
=width
, classes
=class_num
)
aug
= ImageDataGenerator
(rotation_range
=30,width_shift_range
=0.1,
height_shift_range
=0.1,shear_range
=0.2,zoom_range
=0.2,
horizontal_flip
=True,fill_mode
="nearest")
spend_time
=train
(aug
,model
,train_x
,train_y
,test_x
,test_y
)
spend_time
Epoch 1/30
101/101 [==============================] - 21s 208ms/step - loss: 0.4680 - accuracy: 0.7752 - val_loss: 0.3892 - val_accuracy: 0.8338
Epoch 2/30
101/101 [==============================] - 20s 203ms/step - loss: 0.3881 - accuracy: 0.8346 - val_loss: 0.2984 - val_accuracy: 0.8670
Epoch 3/30
101/101 [==============================] - 22s 213ms/step - loss: 0.3372 - accuracy: 0.8595 - val_loss: 0.3227 - val_accuracy: 0.8587
Epoch 4/30
101/101 [==============================] - 21s 204ms/step - loss: 0.2974 - accuracy: 0.8750 - val_loss: 0.2847 - val_accuracy: 0.8947
Epoch 5/30
101/101 [==============================] - 20s 202ms/step - loss: 0.2841 - accuracy: 0.8859 - val_loss: 0.2433 - val_accuracy: 0.8920
Epoch 6/30
101/101 [==============================] - 21s 206ms/step - loss: 0.2757 - accuracy: 0.8912 - val_loss: 0.3759 - val_accuracy: 0.8338
Epoch 7/30
101/101 [==============================] - 20s 202ms/step - loss: 0.2710 - accuracy: 0.8930 - val_loss: 0.2390 - val_accuracy: 0.8947
Epoch 8/30
101/101 [==============================] - 20s 200ms/step - loss: 0.2439 - accuracy: 0.9033 - val_loss: 0.2108 - val_accuracy: 0.9252
Epoch 9/30
101/101 [==============================] - 20s 201ms/step - loss: 0.2403 - accuracy: 0.9036 - val_loss: 0.2434 - val_accuracy: 0.9058
Epoch 10/30
101/101 [==============================] - 20s 201ms/step - loss: 0.2158 - accuracy: 0.9148 - val_loss: 0.2264 - val_accuracy: 0.9141
Epoch 11/30
101/101 [==============================] - 21s 206ms/step - loss: 0.2254 - accuracy: 0.9114 - val_loss: 0.1955 - val_accuracy: 0.9335
Epoch 12/30
101/101 [==============================] - 21s 210ms/step - loss: 0.2159 - accuracy: 0.9132 - val_loss: 0.2420 - val_accuracy: 0.9141
Epoch 13/30
101/101 [==============================] - 21s 203ms/step - loss: 0.2087 - accuracy: 0.9186 - val_loss: 0.1581 - val_accuracy: 0.9418
Epoch 14/30
101/101 [==============================] - 20s 202ms/step - loss: 0.1937 - accuracy: 0.9244 - val_loss: 0.2355 - val_accuracy: 0.9197
Epoch 15/30
101/101 [==============================] - 21s 204ms/step - loss: 0.2072 - accuracy: 0.9173 - val_loss: 0.1640 - val_accuracy: 0.9307
Epoch 16/30
101/101 [==============================] - 20s 201ms/step - loss: 0.1812 - accuracy: 0.9294 - val_loss: 0.1735 - val_accuracy: 0.9474
Epoch 17/30
101/101 [==============================] - 20s 200ms/step - loss: 0.1955 - accuracy: 0.9260 - val_loss: 0.1721 - val_accuracy: 0.9418
Epoch 18/30
101/101 [==============================] - 20s 201ms/step - loss: 0.1676 - accuracy: 0.9394 - val_loss: 0.1566 - val_accuracy: 0.9391
Epoch 19/30
101/101 [==============================] - 20s 201ms/step - loss: 0.1917 - accuracy: 0.9251 - val_loss: 0.2287 - val_accuracy: 0.9224
Epoch 20/30
101/101 [==============================] - 20s 200ms/step - loss: 0.1861 - accuracy: 0.9281 - val_loss: 0.1960 - val_accuracy: 0.9391
Epoch 21/30
101/101 [==============================] - 20s 201ms/step - loss: 0.1668 - accuracy: 0.9344 - val_loss: 0.1823 - val_accuracy: 0.9307
Epoch 22/30
101/101 [==============================] - 21s 205ms/step - loss: 0.1762 - accuracy: 0.9384 - val_loss: 0.1529 - val_accuracy: 0.9474
Epoch 23/30
101/101 [==============================] - 21s 206ms/step - loss: 0.1710 - accuracy: 0.9316 - val_loss: 0.1634 - val_accuracy: 0.9335
Epoch 24/30
101/101 [==============================] - 21s 205ms/step - loss: 0.1666 - accuracy: 0.9369 - val_loss: 0.1662 - val_accuracy: 0.9335
Epoch 25/30
101/101 [==============================] - 21s 207ms/step - loss: 0.1610 - accuracy: 0.9353 - val_loss: 0.1405 - val_accuracy: 0.9557
Epoch 26/30
101/101 [==============================] - 21s 205ms/step - loss: 0.1559 - accuracy: 0.9378 - val_loss: 0.1432 - val_accuracy: 0.9501
Epoch 27/30
101/101 [==============================] - 21s 205ms/step - loss: 0.1716 - accuracy: 0.9363 - val_loss: 0.1572 - val_accuracy: 0.9501
Epoch 28/30
101/101 [==============================] - 21s 205ms/step - loss: 0.1533 - accuracy: 0.9428 - val_loss: 0.1819 - val_accuracy: 0.9446
Epoch 29/30
101/101 [==============================] - 21s 206ms/step - loss: 0.1477 - accuracy: 0.9447 - val_loss: 0.1456 - val_accuracy: 0.9501
Epoch 30/30
101/101 [==============================] - 21s 205ms/step - loss: 0.1667 - accuracy: 0.9359 - val_loss: 0.1446 - val_accuracy: 0.9474
618.6195929050446
pred
= model
.predict
(test_x
, batch_size
=32)
pred
=pred
.argmax
(axis
=1)
testY
=test_y
.argmax
(axis
=1)
test_acc
=accuracy_score
(testY
,pred
)
cnf_matrix
=confusion_matrix
(testY
,pred
)
test_report
=classification_report
(testY
,pred
)
print("-----------------")
print("cnn测试集分类报告:\n",test_report
)
CNN_result
=model_performance_evaluation
("cnn",testY
,pred
,spend_time
)
plot_confusion_matrix
(cnf_matrix
, classes
=[0,1], title
='Confusion matrix')
plot_ROC_curve
(pred
,testY
)
-----------------
cnn测试集分类报告:
precision recall f1-score support
0 0.94 0.96 0.95 197
1 0.96 0.93 0.94 164
avg / total 0.95 0.95 0.95 361
cnn | 准确率: 0.9474
cnn | AUC: 0.9456
cnn | 漏报率为:0.0440
cnn | 误报率为:0.0594
cnn | 训练时长(秒):618.6196
模型性能评估,以及不同模型性能比较
result
=pd
.DataFrame
({
"随机森林":RF_result
,
"支持向量机":SVM_result
,
"神经网络":MLP_result
,
"Adaboost":ADA_result
,
"CNN":CNN_result
,
"Name":["准确率","AUC","漏报率","误报率","训练时长"]
})
result
=result
.set_index
("Name")
result
= pd
.DataFrame
(result
.values
.T
, index
=result
.columns
, columns
=result
.index
)
result
Name准确率AUC漏报率误报率训练时长
随机森林0.8337950.8298410.1623380.1690827.124548支持向量机0.8531860.8562890.1933700.10000012.292464神经网络0.7285320.7359480.3366340.18867910.210611Adaboost0.7008310.7064810.3571430.23030337.866674CNN0.9473680.9456480.0440250.059406618.619593
result
.iloc
[:,:-1].plot
()
plt
.title
("各个模型性能比较")
plt
.xticks
(np
.arange
(5),result
.index
)
plt
.ylim
([0,1])
plt
.show
()
result
['训练时长'].plot
(title
="各模型训练时长")
plt
.xticks
(np
.arange
(5),result
.index
)
plt
.ylabel
("秒")
plt
.show
()
对于误报率与漏报率,相对而言,漏报率更重要。对于误报率偏高我们可以通过优化算法降低这个结果。但是当漏报率偏高时,会对模型最后的结果影响较大,很可能会使得实际的应用中不能有效地发挥作用,同时也会增加人工的筛查操作,降低效率。 由上可见,使用CNN的效果最好,准确率为0.94,AUC为0.94。相比传统机器学习模型,准确率有显著的提升
数据集+源码
关注以下公众号回复" 0008",即可获取github下载地址.