【深度学习-语音分类】婴儿啼哭声识别挑战赛Baseline
比赛简介:Baseline:1. 加载并保存数据:2. 设置训练数据:3. 搭建LSTM模型:
最终结果:有需求的大佬欢迎加入我的接单群,需求详情请群里戳群主获取数据集:
比赛简介:
比赛地址:http://challenge.xfyun.cn/topic/info?type=baby-crying
Baseline:
1. 加载并保存数据:
import os
import wave
import numpy
as np
from tqdm
import tqdm
import pickle
as pkl
LABELS
= ['awake', 'diaper', 'hug', 'hungry', 'sleepy', 'uncomfortable']
N_CLASS
= len(LABELS
)
DATA_DIR
= './train'
file_glob
= []
def get_wave_norm(file):
with wave
.open(file, 'rb') as f
:
params
= f
.getparams
()
nchannels
, sampwidth
, framerate
, nframes
= params
[:4]
data
= f
.readframes
(nframes
)
data
= np
.fromstring
(data
, dtype
=np
.int16
)
return data
, framerate
count
= 0
for i
, cls_fold
in tqdm
(enumerate(LABELS
)):
cls_base
= os
.path
.join
(DATA_DIR
, cls_fold
)
files
= os
.listdir
(cls_base
)
print('{} train num:'.format(cls_fold
), len(files
))
for pt
in files
:
file_pt
= os
.path
.join
(cls_base
, pt
)
count
+= 1
file_glob
.append
((file_pt
, LABELS
.index
(cls_fold
)))
print('done.')
seg
= 32000
count
= 0
data
= []
for file, lbl
in tqdm
(file_glob
):
raw
, sr
= get_wave_norm
(file)
length
= raw
.shape
[0]
for i
in range(length
+1):
start
= i
* seg
end
= start
+ seg
if end
- start
== seg
:
x
= raw
[start
:end
]
l
= x
.shape
[0]
else:
break
l
= np
.zeros
(N_CLASS
)
l
[lbl
] = 1
data
.append
((x
, l
))
count
+= 1
with open('./data.pkl', 'wb') as f
:
pkl
.dump
(data
, f
)
2. 设置训练数据:
from keras
.models
import Model
from keras
.layers
import Input
, Dense
, Dropout
, BatchNormalization
, Conv2D
, MaxPooling2D
, AveragePooling2D
, concatenate
, \
Activation
, ZeroPadding2D
, LSTM
from keras
.layers
import add
, Flatten
, Reshape
from keras
.utils
import plot_model
from keras
.metrics
import top_k_categorical_accuracy
from keras
.preprocessing
.image
import ImageDataGenerator
from keras
.models
import load_model
import os
import wave
import numpy
as np
import pickle
as pkl
train_x
= []
train_y
= []
LABELS
= ['awake', 'diaper', 'hug', 'hungry', 'sleepy', 'uncomfortable']
N_CLASS
= len(LABELS
)
with open('./data.pkl', 'rb') as f
:
raw_data
= pkl
.load
(f
)
for x
, y
in raw_data
:
train_x
.append
(x
)
train_y
.append
(y
)
np
.random
.seed
(5)
np
.random
.shuffle
(train_x
)
np
.random
.seed
(5)
np
.random
.shuffle
(train_y
)
train_x
= np
.array
(train_x
)
train_y
= np
.array
(train_y
)
3. 搭建LSTM模型:
import keras
.backend
as K
from keras
import regularizers
from keras
import layers
from keras
.models
import Sequential
import keras
import os
import wave
import numpy
as np
import pickle
as pkl
LABELS
= ['awake', 'diaper', 'hug', 'hungry', 'sleepy', 'uncomfortable']
N_CLASS
= len(LABELS
)
with open('./data.pkl', 'rb') as f
:
raw_data
= pkl
.load
(f
)
train_x
= np
.expand_dims
(np
.array
(raw_data
['x']), axis
=-1)
train_y
= np
.array
(raw_data
['y'])
np
.random
.seed
(5)
np
.random
.shuffle
(train_x
)
np
.random
.seed
(5)
np
.random
.shuffle
(train_y
)
model
= Sequential
()
model
.add
(layers
.LSTM
(128, input_shape
=(seg
, 1), dropout
=0.5, return_sequences
=True))
model
.add
(layers
.LSTM
(128, dropout
=0.5, return_sequences
=True))
model
.add
(layers
.LSTM
(64))
model
.add
(layers
.Dense
(N_CLASS
, activation
="softmax"))
model
.summary
()
adam
= keras
.optimizers
.adam
(2e-4)
model
.compile(loss
='categorical_crossentropy',
optimizer
=adam
, metrics
=['accuracy'])
batch_size
= 64
model
.fit
(x
=train_x
, y
=train_y
, batch_size
=batch_size
,
epochs
=200, validation_split
=0.1, shuffle
=True)
model
.save
('my_model.h5')
最终结果:
有需求的大佬欢迎加入我的接单群,需求详情请群里戳群主
获取数据集:
关注我的公众号——可达鸭的深度学习教程,公众号内回复:“婴儿啼哭” 获取数据集: