这里使用tensorflow构建三层神经网络,模型的大致结构为LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX,最后一层是SOFTMAX层,而不是SIGMOID层。 首先需要对输入X和输出Y创建占位符,这样下面执行前向传播时可以直接将训练集的数据填充到tensorflow计算图中。
def create_placeholders(n_x, n_y): """ 参数: n_x --输入向量的大小 n_y -- 输出类别数量 """ X = tf.placeholder(tf.float32, [n_x, None], name="X") Y = tf.placeholder(tf.float32, [n_y, None], name="Y") return X, Y第二个任务就是用tensorflow来初始化神经网络模型的参数。
def initialize_parameters(): # 设置tensorflow中的随机数种子 tf.set_random_seed(1) # 用tensorflow内置的xavier_initializer函数来进行w的初始化。 W1 = tf.get_variable("W1", [25, 12288], initializer = tf.contrib.layers.xavier_initializer(seed=1)) # 我们将b初始化为0 b1 = tf.get_variable("b1", [25, 1], initializer = tf.zeros_initializer()) W2 = tf.get_variable("W2", [12, 25], initializer = tf.contrib.layers.xavier_initializer(seed=1)) b2 = tf.get_variable("b2", [12, 1], initializer = tf.zeros_initializer()) W3 = tf.get_variable("W3", [6, 12], initializer = tf.contrib.layers.xavier_initializer(seed=1)) b3 = tf.get_variable("b3", [6, 1], initializer = tf.zeros_initializer()) parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2, "W3": W3, "b3": b3} return parameters第三个任务用tensorflow实现神经网络前向传播。
def forward_propagation(X, parameters): W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] W3 = parameters['W3'] b3 = parameters['b3'] Z1 = tf.add(tf.matmul(W1, X), b1) A1 = tf.nn.relu(Z1) Z2 = tf.add(tf.matmul(W2, A1), b2) A2 = tf.nn.relu(Z2) Z3 = tf.add(tf.matmul(W3, A2), b3) return Z3第四个任务是用tensorflow计算神经网络cost成本。
def compute_cost(Z3, Y): logits = tf.transpose(Z3) labels = tf.transpose(Y) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)) return cost然后就可以构建完整的模型了。
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001, num_epochs = 1500, minibatch_size = 32, print_cost = True): ops.reset_default_graph()# 将计算图返回到默认空状态 tf.set_random_seed(1) seed = 3 (n_x, m) = X_train.shape # (n_x: 特征数量, m : 训练集中的样本数) n_y = Y_train.shape[0] costs = [] # 创建占位符 X, Y = create_placeholders(n_x, n_y) # 初始化参数 parameters = initialize_parameters() # 构建前向传播操作 Z3 = forward_propagation(X, parameters) # 构建成本计算操作 cost = compute_cost(Z3, Y) # 构建反向传播,为反向传播指定优化算法和学习率以及成本函数,这里我们使用adam算法, optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # 定义初始化操作 init = tf.global_variables_initializer() # 开始一个tensorflow的session with tf.Session() as sess: # 执行初始化操作 sess.run(init) # 执行epochs指定的训练次数 for epoch in range(num_epochs): epoch_cost = 0. num_minibatches = int(m / minibatch_size) # 计算有多少个子训练集 seed = seed + 1 # 将数据集分成若干子训练集 minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed) # 循环遍历每一个子训练集 for minibatch in minibatches: (minibatch_X, minibatch_Y) = minibatch # 整个计算图被执行,从前向传播操作到反向传播操作,最后到参数更新操作。 _ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y}) epoch_cost += minibatch_cost / num_minibatches if print_cost == True and epoch % 100 == 0: print ("Cost after epoch %i: %f" % (epoch, epoch_cost)) if print_cost == True and epoch % 5 == 0: costs.append(epoch_cost) # 画出cost成本的走势图 plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Learning rate =" + str(learning_rate)) plt.show() # 从计算图中获取训练好了的参数 parameters = sess.run(parameters) print("Parameters have been trained!") # 分别计算一下在训练集和测试集上面的预测精准度 correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train})) print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test})) return parameters在本地使用上述的网络的cost成本曲线为: