假定要拟合目标曲线: y = s i n 2 π x y=sin2 \pi x y=sin2πx
import numpy as np import scipy as sp from scipy.optimize import leastsq #最小二乘法逼近函数 import matplotlib.pyplot as plt #目标函数 def real_func(x): return np.sin(2*np.pi*x) #多项式函数 def fit_func(p, x): #p是多项式的系数 #用p系数构建多项式 f = np.poly1d(p) return f(x) #残差 def residuals_func(p, x, y): ret = fit_func(p, x) - y return ret #十个点 x = np.linspace(0, 1, 10) x_points = np.linspace(0, 1, 1000) #加上正态分布噪音的目标函数的值 y_ = real_func(x) y = [np.random.normal(0, 0.1) + y1 for y1 in y_] def fitting(M=0): #M为 多项式的次数 #随机初始化多项式参数 p_init = np.random.rand(M + 1) #最小二乘法 p_lsq = leastsq(residuals_func, p_init, args=(x, y)) #p_lsq[0]是拟合的系数,leastsq[1]是拟合方程的次数 print('Fitting Parameters:', p_lsq[0]) #可视化 plt.plot(x_points, real_func(x_points), label='real') plt.plot(x_points, fit_func(p_lsq[0], x_points), label='fitted curve') plt.plot(x, y, 'bo', label='noise') plt.legend() return p_lsq多项式为1次时
p_lsq_1 = fitting(M=1)多项式为2次时
p_lsq_2 = fitting(M=2)多项式为3次时
p_lsq_3 = fitting(M=3)多项式为9次时
p_lsq_9 = fitting(M=9)显然多项式为9次的时候过拟合了,我们现在正则化
#正则率 regularization = 0.0001 def residuals_func_regularization(p, x, y): ret = fit_func(p, x) - y #np.append函数在原函数的基础上加上正则化项 #L2范数作为正则化项 ret = np.append(np.square(ret),regularization * np.square(p)) return ret #最小二乘法,加正则化项 p_init = np.random.rand(9) p_lsq_regularization = leastsq(residuals_func_regularization, p_init, args=(x, y)) plt.plot(x_points, real_func(x_points), label='real') plt.plot(x_points, fit_func(p_lsq_9[0], x_points), label='fitted curve') plt.plot( x_points, fit_func(p_lsq_regularization[0], x_points), label='regularization') plt.plot(x, y, 'bo', label='noise') plt.legend()显然正则化能有效的处理过拟合问题