目录 1.对真实值类别编码: 2.预测值: 3.目标函数要求: 4.使用Softmax模型将输出置信度Oi计算转换为输出匹配概率y^i: 5.使用交叉熵作为损失函数: 6.代码实现:
1.对真实值类别编码:
y为真实值,有且仅有一个位置值为1,该位置即为该元素真实类别
2.预测值:
3.目标函数要求:
对于正确类y的置信度Oy要远远大于其他非正确类的置信度Oi,才能使识别到的正确类与错误类具有更明显的差距
4.使用Softmax模型将输出置信度Oi计算转换为输出匹配概率y^i:
y^为n维向量,每个元素非负且和为1 y^i为元素与类别i匹配的概率
5.使用交叉熵作为损失函数:
L为真实概率y与预测概率y^的差距 分类问题不关心非正确类的预测值,只关心正确类的预测值有多大
6.代码实现:
import sys
import os
import matplotlib. pyplot as plt
import torch
import torchvision
from torchvision import transforms
from torch. utils import data
from d2l import torch as d2los. environ[ "KMP_DUPLICATE_LIB_OK" ] = "TRUE"
batch_size = 256
trans = transforms. ToTensor( )
mnist_train = torchvision. datasets. FashionMNIST( root= "../data" , train= True , transform= trans, download= True )
mnist_test = torchvision. datasets. FashionMNIST( root= "../data" , train= False , transform= trans, download= True )
print ( len ( mnist_train) )
print ( '11111111' )
def get_fashion_mnist_labels ( labels) : """返回Fashion-MNIST数据集的文本标签。""" text_labels = [ 't-shirt' , 'trouser' , 'pullover' , 'dress' , 'coat' , 'sandal' , 'shirt' , 'sneaker' , 'bag' , 'ankle boot' ] return [ text_labels[ int ( i) ] for i in labels] def show_fashion_mnist ( images, labels) : d2l. use_svg_display( ) _, figs = plt. subplots( 1 , len ( images) , figsize= ( 12 , 12 ) ) for f, img, lbl in zip ( figs, images, labels) : f. imshow( img. view( ( 28 , 28 ) ) . numpy( ) ) f. set_title( lbl) f. axes. get_xaxis( ) . set_visible( False ) f. axes. get_yaxis( ) . set_visible( False ) plt. show( ) train_data, train_targets = next ( iter ( data. DataLoader( mnist_train, batch_size= 18 ) ) )
show_fashion_mnist( train_data[ 0 : 10 ] , train_targets[ 0 : 10 ] )
num_inputs = 784
num_outputs = 10 W = torch. normal( 0 , 0.01 , size= ( num_inputs, num_outputs) , requires_grad= True )
b = torch. zeros( num_outputs, requires_grad= True )
def softmax ( X) : X_exp = X. exp( ) partition = X_exp. sum ( dim= 1 , keepdim= True ) return X_exp / partition def net ( X) : return softmax( torch. matmul( X. reshape( - 1 , num_inputs) , W) + b)
y_hat = torch. tensor( [ [ 0.1 , 0.3 , 0.6 ] , [ 0.3 , 0.2 , 0.5 ] ] )
y = torch. LongTensor( [ 0 , 2 ] )
y_hat. gather( 1 , y. view( - 1 , 1 ) ) def cross_entropy ( y_hat, y) : return - torch. log( y_hat. gather( 1 , y. view( - 1 , 1 ) ) )
def accuracy ( y_hat, y) : return ( y_hat. argmax( dim= 1 ) == y) . float ( ) . mean( ) . item( )
def evaluate_accuracy ( data_iter, net) : acc_sum, n = 0.0 , 0 for X, y in data_iter: acc_sum += ( net( X) . argmax( dim= 1 ) == y) . float ( ) . sum ( ) . item( ) n += y. shape[ 0 ] return acc_sum / nnum_epochs, lr = 10 , 0.1
def train_ch3 ( net, train_iter, test_iter, loss, num_epochs, batch_size, params= None , lr= None , optimizer= None ) : for epoch in range ( num_epochs) : train_l_sum, train_acc_sum, n = 0.0 , 0.0 , 0 for X, y in train_iter: y_hat = net( X) l = loss( y_hat, y) . sum ( ) if params is not None and params[ 0 ] . grad is not None : for param in params: param. grad. data. zero_( ) l. backward( ) if optimizer is not None : optimizer. step( ) else : d2l. sgd( params, lr, batch_size) train_l_sum += l. item( ) train_acc_sum += ( y_hat. argmax( dim= 1 ) == y) . sum ( ) . item( ) n += y. shape[ 0 ] test_acc = evaluate_accuracy( test_iter, net) print ( 'epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % ( epoch + 1 , train_l_sum / n, train_acc_sum / n, test_acc) )
train_iter, test_iter = d2l. load_data_fashion_mnist( batch_size)
train_ch3( net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [ W, b] , lr)
for X, y in test_iter: break
true_labels = get_fashion_mnist_labels( y. numpy( ) )
pred_labels = get_fashion_mnist_labels( net( X) . argmax( dim= 1 ) . numpy( ) )
titles = [ true + '\n' + pred for true, pred in zip ( true_labels, pred_labels) ]
show_fashion_mnist( X[ 0 : 9 ] , titles[ 0 : 9 ] )