【深度学习-语音分类】婴儿啼哭声识别挑战赛Baseline

    技术2022-07-10  115

    【深度学习-语音分类】婴儿啼哭声识别挑战赛Baseline

    比赛简介:Baseline:1. 加载并保存数据:2. 设置训练数据:3. 搭建LSTM模型: 最终结果:有需求的大佬欢迎加入我的接单群,需求详情请群里戳群主获取数据集:

    比赛简介:

    比赛地址:http://challenge.xfyun.cn/topic/info?type=baby-crying

    Baseline:

    1. 加载并保存数据:

    import os import wave import numpy as np from tqdm import tqdm import pickle as pkl LABELS = ['awake', 'diaper', 'hug', 'hungry', 'sleepy', 'uncomfortable'] N_CLASS = len(LABELS) DATA_DIR = './train' file_glob = [] def get_wave_norm(file): with wave.open(file, 'rb') as f: params = f.getparams() nchannels, sampwidth, framerate, nframes = params[:4] data = f.readframes(nframes) data = np.fromstring(data, dtype=np.int16) return data, framerate count = 0 for i, cls_fold in tqdm(enumerate(LABELS)): cls_base = os.path.join(DATA_DIR, cls_fold) files = os.listdir(cls_base) print('{} train num:'.format(cls_fold), len(files)) for pt in files: file_pt = os.path.join(cls_base, pt) count += 1 file_glob.append((file_pt, LABELS.index(cls_fold))) print('done.') seg = 32000 count = 0 data = [] for file, lbl in tqdm(file_glob): raw, sr = get_wave_norm(file) length = raw.shape[0] for i in range(length+1): start = i * seg end = start + seg if end - start == seg: x = raw[start:end] l = x.shape[0] else: break l = np.zeros(N_CLASS) l[lbl] = 1 data.append((x, l)) count += 1 with open('./data.pkl', 'wb') as f: pkl.dump(data, f)

    2. 设置训练数据:

    from keras.models import Model from keras.layers import Input, Dense, Dropout, BatchNormalization, Conv2D, MaxPooling2D, AveragePooling2D, concatenate, \ Activation, ZeroPadding2D, LSTM from keras.layers import add, Flatten, Reshape from keras.utils import plot_model from keras.metrics import top_k_categorical_accuracy from keras.preprocessing.image import ImageDataGenerator from keras.models import load_model import os import wave import numpy as np import pickle as pkl train_x = [] train_y = [] LABELS = ['awake', 'diaper', 'hug', 'hungry', 'sleepy', 'uncomfortable'] N_CLASS = len(LABELS) with open('./data.pkl', 'rb') as f: raw_data = pkl.load(f) for x, y in raw_data: train_x.append(x) train_y.append(y) np.random.seed(5) np.random.shuffle(train_x) np.random.seed(5) np.random.shuffle(train_y) train_x = np.array(train_x) train_y = np.array(train_y)

    3. 搭建LSTM模型:

    import keras.backend as K from keras import regularizers from keras import layers from keras.models import Sequential import keras import os import wave import numpy as np import pickle as pkl LABELS = ['awake', 'diaper', 'hug', 'hungry', 'sleepy', 'uncomfortable'] N_CLASS = len(LABELS) with open('./data.pkl', 'rb') as f: raw_data = pkl.load(f) train_x = np.expand_dims(np.array(raw_data['x']), axis=-1) train_y = np.array(raw_data['y']) np.random.seed(5) np.random.shuffle(train_x) np.random.seed(5) np.random.shuffle(train_y) model = Sequential() model.add(layers.LSTM(128, input_shape=(seg, 1), dropout=0.5, return_sequences=True)) model.add(layers.LSTM(128, dropout=0.5, return_sequences=True)) model.add(layers.LSTM(64)) model.add(layers.Dense(N_CLASS, activation="softmax")) model.summary() adam = keras.optimizers.adam(2e-4) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) # Train model on dataset batch_size = 64 model.fit(x=train_x, y=train_y, batch_size=batch_size, epochs=200, validation_split=0.1, shuffle=True) model.save('my_model.h5')

    最终结果:

    有需求的大佬欢迎加入我的接单群,需求详情请群里戳群主

    获取数据集:

    关注我的公众号——可达鸭的深度学习教程,公众号内回复:“婴儿啼哭” 获取数据集:

    Processed: 0.012, SQL: 9