案例目的
通过已标注的数据,训练出模型来预测患者是否有患乳腺癌。
该问题属于二分类问题,所以可以使用Sigmoid激活函数,损失用BCE函数
代码逻辑步骤
- 读取数据
- 训练集与测试集拆分
- 数据标准化
- 数据转化为Pytorch张量
- label维度转换
- 定义模型
- 定义损失计算函数
- 定义优化器
- 定义梯度下降函数
- 模型训练(正向传播、计算损失、反向传播、梯度清空)
- 模型测试
- 精度计算
代码实现
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScalerdf = pd.read_csv('/Volumes/Sophia/机器学习/day03/code/breast_cancer.csv')
X = df[df.columns[0:-1]].values
Y = df[df.columns[-1]].values
# 数据集拆分
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=5)# 数据标准化
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)# 转化为张量
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
Y_train = torch.from_numpy(Y_train.astype(np.float32))
Y_test = torch.from_numpy(Y_test.astype(np.float32))
# 标签转化为二维数据
# print(Y_train.shape)
Y_train = Y_train.view(Y_train.shape[0],-1)
Y_test = Y_test.view(Y_test.shape[0],-1)# 定义模型
class Model(torch.nn.Module):def __init__(self,n_input_features):super(Model,self).__init__()self.linear = torch.nn.Linear(n_input_features,1)def forward(self,x):y = torch.sigmoid(self.linear(x))return yn_features = X_train.shape[1]
# 定义损失函数
model = Model(n_features)
loss = torch.nn.BCELoss()
# 定义优化器
# 学习率
learning_rate = 0.001
optimzier = torch.optim.SGD(model.parameters(),lr=learning_rate)
# 定义梯度下降函数
def gradient_descent():pre_y = model(X_train)l = loss(pre_y,Y_train)l.backward()optimzier.step()optimzier.zero_grad()return l,list(model.parameters())# 模型训练
for i in range(500):l,pa = gradient_descent()if i % 50 == 0:print(l,pa)# 模型测试
index = np.random.randint(0,X_test.shape[0])
pre = model(X_test[index])
print(pre,Y_test[index])# 计算模型准确率
pres_y = model(X_test).round()
result = np.where(pres_y==Y_test,1,0)
ac = np.sum(result)/result.size
print(ac)