非线性回归应用(Logistic Regression Application)
理论+实际应用整合到一起链接
import numpy as np
import random# 一个函数为梯度下降的算法
def GradientDescent(x,y,theta,alpha,m,numInterations):# m denotes the number of examples here, not the number of features'''x:实例;y:分类标签theta:要学习的参数θalpha:learning ratem:更新法则公式中实例的个数,对应矩阵的维数[]numInterations:使用此方法循环训练更新的次数'''xTrans = x.transpose() #转置x便于后面运算for i in range(0,numInterations):hypothesis = np.dot(x,theta) #这里为什么要放在for循环里面,并不受循环影响? #for循环次数即为更新次数loss = hypothesis - y #hypothesis其实就是y_hat,这里loss就等于y_hat减去y(实际)# avg cost per example (the 2 in 2*m doesn't really matter here.# But to be consistent with the gradient, I include itcost = np.sum(loss**2)/(2*m)#这里的cost函数与课文中提到的cost函数不一样,这里使用了一个简单的cost便于计算'''cost:对精确度的衡量,每一次gradient都会减小'''print('Interation:%d|cost:%f'%(i,cost))# avg gradient per examplegradient = np.dot(xTrans,loss)/m #每一次的下降梯度值,除以m:取平均# updatatheta = theta-alpha*gradient #即更新法则的公式:θ=θ-α∑(h(x)-y)xreturn theta# 一个函数用来产生数据用来测试拟合
def genData(numPoints,bias,variance):'''numPoints:实例的行数(矩阵形式,每一行对应一对实例)bias:生成y时产生一个偏差值variance:方差'''x = np.zeros(shape=(numPoints,2)) #numPoints行,2列的矩阵y = np.zeros(shape=(numPoints))#basically a staight linefor i in range(0,numPoints):# bias featurex[i][0] = 1x[i][1] = i# target variabley[i] = (i+bias)+random.uniform(0,1)*variance #random.uniform(0,1)同random.random()产生0~1随机数return x,y# generate 100 columns with a bias of 25 and 10 variance as a bit of noise
x,y = genData(100,25,10)#前面函数返回了两个变量x,y此处可以任意取两个变量按偏移量赋值给返回的x和y
# print(x)
# print(y)
m,n = np.shape(x) #x的行数赋值给m,列数赋值为n
a = np.shape(y) #y只有一列不会返回列的数值,会返回行的数值
# print(m,n) #(100行,2列)
# print(a) #(100行,1列)numInterations = 100000
alpha = 0.0005 #取0~1,比较好的算法会设置开始的alpha数值较大后期数值较小
theta = np.ones(n) # 初始化θ:[1. 1.] 为什么设置为1?
theta = GradientDescent(x,y,theta,alpha,m,numInterations)
print(theta) #约为[30 1]# 得出的theta就可以用于对新实例的计算和预测
#回归算法和神经网络中都会用到此梯度下降的方法