cs231n作业2 双层神经网络

双层神经网络

在这里插入图片描述
在这里插入图片描述
我们选用ReLU函数和softmax函数:
在这里插入图片描述
步骤:
1、LOSS损失函数(前向传播)与梯度(后向传播)计算

Forward: 计算score,再根据score计算loss
Backward:分别对W2、b2、W1、b1求梯度

def loss(self, X, y=None, reg=0.0):# Unpack variables from the params dictionaryW1, b1 = self.params['W1'], self.params['b1']W2, b2 = self.params['W2'], self.params['b2']N, D = X.shape# Compute the forward passscores = Noneh1 = np.maximum(0, np.dot(X,W1) + b1) #(5,10)scores = np.dot(h1,W2) + b2 # (5,3)if y is None:return scores# Compute the lossloss = Noneexp_S = np.exp(scores) #(5,3)sum_exp_S = np.sum(exp_S,axis = 1) sum_exp_S = sum_exp_S.reshape(-1,1) #(5,1)#print (sum_exp_S.shape)loss = np.sum(-scores[range(N),list(y)]) + sum(np.log(sum_exp_S))loss = loss / N + 0.5 * reg * np.sum(W1 * W1) +  0.5 * reg * np.sum(W2 * W2)# Backward pass: compute gradientsgrads = {}#---------------------------------#dscores = np.zeros(scores.shape)dscores[range(N),list(y)] = -1dscores += (exp_S/sum_exp_S) #(5,3) dscores /= Ngrads['W2'] = np.dot(h1.T, dscores)grads['W2'] += reg * W2grads['b2'] = np.sum(dscores, axis = 0)#---------------------------------#dh1 = np.dot(dscores, W2.T)  #(5,10)dh1_ReLU = (h1>0) * dh1grads['W1'] = X.T.dot(dh1_ReLU) + reg * W1grads['b1'] = np.sum(dh1_ReLU, axis = 0)#---------------------------------#return loss, grads

2、训练函数 (迭代过程:forward–>backward–>update–>forward–>backward->update……)

def train(self, X, y, X_val, y_val,learning_rate=1e-3, learning_rate_decay=0.95,reg=5e-6, num_iters=100,batch_size=200, verbose=False):num_train = X.shape[0]iterations_per_epoch = max(num_train / batch_size, 1)# Use SGD to optimize the parameters in self.modelloss_history = []train_acc_history = []val_acc_history = []for it in xrange(num_iters):X_batch = Noney_batch = Nonemask = np.random.choice(num_train,batch_size,replace = True)X_batch = X[mask]y_batch = y[mask]# Compute loss and gradients using the current minibatchloss, grads = self.loss(X_batch, y=y_batch, reg=reg)loss_history.append(loss)self.params['W1'] += -learning_rate * grads['W1']self.params['b1'] += -learning_rate * grads['b1']self.params['W2'] += -learning_rate * grads['W2']self.params['b2'] += -learning_rate * grads['b2']if verbose and it % 100 == 0:print('iteration %d / %d: loss %f' % (it, num_iters, loss))# Every epoch, check train and val accuracy and decay learning rate.if it % iterations_per_epoch == 0:# Check accuracy#print ('第%d个epoch' %it)train_acc = (self.predict(X_batch) == y_batch).mean()val_acc = (self.predict(X_val) == y_val).mean()train_acc_history.append(train_acc)val_acc_history.append(val_acc)# Decay learning ratelearning_rate *= learning_rate_decay #减小学习率return {'loss_history': loss_history,'train_acc_history': train_acc_history,'val_acc_history': val_acc_history,}

3、预测函数
4、参数训练

用于机器视觉识别的卷积神经网络

多层全连接神经网络

两个基本的layer:

def affine_forward(x, w, b):out = NoneN=x.shape[0]x_new=x.reshape(N,-1)#转为二维向量out=np.dot(x_new,w)+bcache = (x, w, b) # 不需要保存outreturn out, cachedef affine_backward(dout, cache):x, w, b = cachedx, dw, db = None, None, Nonedx=np.dot(dout,w.T)dx=np.reshape(dx,x.shape)x_new=x.reshape(x.shape[0],-1)dw=np.dot(x_new.T,dout) db=np.sum(dout,axis=0,keepdims=True)return dx, dw, dbdef relu_forward(x):out = Noneout=np.maximum(0,x)cache = xreturn out, cachedef relu_backward(dout, cache):dx, x = None, cachereturn dx

构建一个Sandwich的层:

def affine_relu_forward(x, w, b):a, fc_cache = affine_forward(x, w, b)out, relu_cache = relu_forward(a)cache = (fc_cache, relu_cache)return out, cachedef affine_relu_backward(dout, cache):fc_cache, relu_cache = cacheda = relu_backward(dout, relu_cache)dx, dw, db = affine_backward(da, fc_cache)return dx, dw, db

FullyConnectedNet:

class FullyConnectedNet(object):def __init__(self, hidden_dims, input_dim=3*32*32, num_classes=10,dropout=0, use_batchnorm=False, reg=0.0,weight_scale=1e-2, dtype=np.float32, seed=None):self.use_batchnorm = use_batchnormself.use_dropout = dropout > 0self.reg = regself.num_layers = 1 + len(hidden_dims)self.dtype = dtypeself.params = {}layers_dims = [input_dim] + hidden_dims + [num_classes] #z这里存储的是每个layer的大小for i in xrange(self.num_layers):self.params['W' + str(i + 1)] = weight_scale * np.random.randn(layers_dims[i], layers_dims[i + 1])self.params['b' + str(i + 1)] = np.zeros((1, layers_dims[i + 1]))if self.use_batchnorm and i < len(hidden_dims):#最后一层是不需要batchnorm的self.params['gamma' + str(i + 1)] = np.ones((1, layers_dims[i + 1]))self.params['beta' + str(i + 1)] = np.zeros((1, layers_dims[i + 1]))self.dropout_param = {}if self.use_dropout:self.dropout_param = {'mode': 'train', 'p': dropout}if seed is not None:self.dropout_param['seed'] = seedself.bn_params = []if self.use_batchnorm:self.bn_params = [{'mode': 'train'} for i in xrange(self.num_layers - 1)]# Cast all parameters to the correct datatypefor k, v in self.params.iteritems():self.params[k] = v.astype(dtype)def loss(self, X, y=None):X = X.astype(self.dtype)mode = 'test' if y is None else 'train'if self.dropout_param is not None:self.dropout_param['mode'] = modeif self.use_batchnorm:for bn_param in self.bn_params:bn_param[mode] = modescores = Noneh, cache1, cache2, cache3,cache4, bn, out = {}, {}, {}, {}, {}, {},{}out[0] = X #存储每一层的out,按照逻辑,X就是out0[0]# Forward pass: compute lossfor i in xrange(self.num_layers - 1):# 得到每一层的参数w, b = self.params['W' + str(i + 1)], self.params['b' + str(i + 1)]if self.use_batchnorm:gamma, beta = self.params['gamma' + str(i + 1)], self.params['beta' + str(i + 1)]h[i], cache1[i] = affine_forward(out[i], w, b)bn[i], cache2[i] = batchnorm_forward(h[i], gamma, beta, self.bn_params[i])out[i + 1], cache3[i] = relu_forward(bn[i])if self.use_dropout:out[i+1], cache4[i] = dropout_forward(out[i+1]  , self.dropout_param)else:out[i + 1], cache3[i] = affine_relu_forward(out[i], w, b)if self.use_dropout:out[i + 1], cache4[i] = dropout_forward(out[i + 1], self.dropout_param)W, b = self.params['W' + str(self.num_layers)], self.params['b' + str(self.num_layers)]scores, cache = affine_forward(out[self.num_layers - 1], W, b) #对最后一层进行计算if mode == 'test':return scoresloss, grads = 0.0, {}data_loss, dscores = softmax_loss(scores, y)reg_loss = 0for i in xrange(self.num_layers):reg_loss += 0.5 * self.reg * np.sum(self.params['W' + str(i + 1)] * self.params['W' + str(i + 1)])loss = data_loss + reg_loss# Backward pass: compute gradientsdout, dbn, dh, ddrop = {}, {}, {}, {}t = self.num_layers - 1dout[t], grads['W' + str(t + 1)], grads['b' + str(t + 1)] = affine_backward(dscores, cache)#这个cache就是上面得到的for i in xrange(t):if self.use_batchnorm:if self.use_dropout:dout[t - i] = dropout_backward(dout[t-i], cache4[t-1-i])dbn[t - 1 - i] = relu_backward(dout[t - i], cache3[t - 1 - i])dh[t - 1 - i], grads['gamma' + str(t - i)], grads['beta' + str(t - i)] = batchnorm_backward(dbn[t - 1 - i],cache2[t - 1 - i])dout[t - 1 - i], grads['W' + str(t - i)], grads['b' + str(t - i)] = affine_backward(dh[t - 1 - i],cache1[t - 1 - i])else:if self.use_dropout:dout[t - i] = dropout_backward(dout[t - i], cache4[t - 1 - i])dout[t - 1 - i], grads['W' + str(t - i)], grads['b' + str(t - i)] = affine_relu_backward(dout[t - i],cache3[t - 1 - i])# Add the regularization gradient contributionfor i in xrange(self.num_layers):grads['W' + str(i + 1)] += self.reg * self.params['W' + str(i + 1)]return loss, grads

使用slover来对神经网络进优化求解

之后进行参数更新:

  1. SGD
  2. Momentum
  3. Nestero
  4. RMSProp and Adam

批量规范化

在这里插入图片描述

BN层前向传播:
在这里插入图片描述

BN层反向传播:
在这里插入图片描述

def batchnorm_forward(x, gamma, beta, bn_param):mode = bn_param['mode']  #因为train和test是两种不同的方法eps = bn_param.get('eps', 1e-5)momentum = bn_param.get('momentum', 0.9)N, D = x.shaperunning_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype))running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype))out, cache = None, Noneif mode == 'train':    sample_mean = np.mean(x, axis=0, keepdims=True)       # [1,D]    sample_var = np.var(x, axis=0, keepdims=True)         # [1,D] x_normalized = (x - sample_mean) / np.sqrt(sample_var + eps)    # [N,D]    out = gamma * x_normalized + beta    cache = (x_normalized, gamma, beta, sample_mean, sample_var, x, eps)    running_mean = momentum * running_mean + (1 - momentum) * sample_mean    #通过moument得到最终的running_mean和running_varrunning_var = momentum * running_var + (1 - momentum) * sample_varelif mode == 'test':    x_normalized = (x - running_mean) / np.sqrt(running_var + eps)    #test的时候如何通过BN层out = gamma * x_normalized + betaelse:    raise ValueError('Invalid forward batchnorm mode "%s"' % mode)# Store the updated running means back into bn_parambn_param['running_mean'] = running_meanbn_param['running_var'] = running_varreturn out, cachedef batchnorm_backward(dout, cache):dx, dgamma, dbeta = None, None, Nonex_normalized, gamma, beta, sample_mean, sample_var, x, eps = cacheN, D = x.shapedx_normalized = dout * gamma       # [N,D]x_mu = x - sample_mean             # [N,D]sample_std_inv = 1.0 / np.sqrt(sample_var + eps)    # [1,D]dsample_var = -0.5 * np.sum(dx_normalized * x_mu, axis=0, keepdims=True) * sample_std_inv**3dsample_mean = -1.0 * np.sum(dx_normalized * sample_std_inv, axis=0, keepdims=True) - \                                2.0 * dsample_var * np.mean(x_mu, axis=0, keepdims=True)dx1 = dx_normalized * sample_std_invdx2 = 2.0/N * dsample_var * x_mudx = dx1 + dx2 + 1.0/N * dsample_meandgamma = np.sum(dout * x_normalized, axis=0, keepdims=True)dbeta = np.sum(dout, axis=0, keepdims=True)return dx, dgamma, dbeta

Batch Normalization解决的一个重要问题就是梯度饱和。

Dropout

训练的时候以一定的概率来去每层的神经元:
在这里插入图片描述
可以防止过拟合。还可以理解为dropout是一个正则化的操作,他在每次训练的时候,强行让一些feature为0,这样提高了网络的稀疏表达能力。

def dropout_forward(x, dropout_param):p, mode = dropout_param['p'], dropout_param['mode']if 'seed' in dropout_param:  np.random.seed(dropout_param['seed'])mask = Noneout = Noneif mode == 'train':    mask = (np.random.rand(*x.shape) < p) / p    #注意这里除以了一个P,这样在test的输出的时候,维持原样即可out = x * maskelif mode == 'test':    out = xcache = (dropout_param, mask)out = out.astype(x.dtype, copy=False)return out, cachedef dropout_backward(dout, cache):dropout_param, mask = cachemode = dropout_param['mode']dx = Noneif mode == 'train':    dx = dout * maskelif mode == 'test':    dx = doutreturn dx

卷积神经网络

卷积层的前向传播与反向传播
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

def conv_forward_naive(x, w, b, conv_param):stride, pad = conv_param['stride'], conv_param['pad']N, C, H, W = x.shapeF, C, HH, WW = w.shapex_padded = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), mode='constant') #补零H_new = 1 + (H + 2 * pad - HH) / strideW_new = 1 + (W + 2 * pad - WW) / strides = strideout = np.zeros((N, F, H_new, W_new))for i in xrange(N):       # ith image    for f in xrange(F):   # fth filter        for j in xrange(H_new):            for k in xrange(W_new):                out[i, f, j, k] = np.sum(x_padded[i, :, j*s:HH+j*s, k*s:WW+k*s] * w[f]) + b[f]#对应位相乘cache = (x, w, b, conv_param)return out, cachedef conv_backward_naive(dout, cache):x, w, b, conv_param = cachepad = conv_param['pad']stride = conv_param['stride']F, C, HH, WW = w.shapeN, C, H, W = x.shapeH_new = 1 + (H + 2 * pad - HH) / strideW_new = 1 + (W + 2 * pad - WW) / stridedx = np.zeros_like(x)dw = np.zeros_like(w)db = np.zeros_like(b)s = stridex_padded = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), 'constant')dx_padded = np.pad(dx, ((0, 0), (0, 0), (pad, pad), (pad, pad)), 'constant')for i in xrange(N):       # ith image    for f in xrange(F):   # fth filter        for j in xrange(H_new):            for k in xrange(W_new):                window = x_padded[i, :, j*s:HH+j*s, k*s:WW+k*s]db[f] += dout[i, f, j, k]                dw[f] += window * dout[i, f, j, k]                dx_padded[i, :, j*s:HH+j*s, k*s:WW+k*s] += w[f] * dout[i, f, j, k]#上面的式子,关键就在于+号# Unpaddx = dx_padded[:, :, pad:pad+H, pad:pad+W]return dx, dw, db

池化层

def max_pool_forward_naive(x, pool_param):HH, WW = pool_param['pool_height'], pool_param['pool_width']s = pool_param['stride']N, C, H, W = x.shapeH_new = 1 + (H - HH) / sW_new = 1 + (W - WW) / sout = np.zeros((N, C, H_new, W_new))for i in xrange(N):    for j in xrange(C):        for k in xrange(H_new):            for l in xrange(W_new):                window = x[i, j, k*s:HH+k*s, l*s:WW+l*s] out[i, j, k, l] = np.max(window)cache = (x, pool_param)return out, cachedef max_pool_backward_naive(dout, cache):x, pool_param = cacheHH, WW = pool_param['pool_height'], pool_param['pool_width']s = pool_param['stride']N, C, H, W = x.shapeH_new = 1 + (H - HH) / sW_new = 1 + (W - WW) / sdx = np.zeros_like(x)for i in xrange(N):    for j in xrange(C):        for k in xrange(H_new):            for l in xrange(W_new):                window = x[i, j, k*s:HH+k*s, l*s:WW+l*s]                m = np.max(window)               #获得之前的那个值,这样下面只要windows==m就能得到相应的位置dx[i, j, k*s:HH+k*s, l*s:WW+l*s] = (window == m) * dout[i, j, k, l]return dx

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/diannao/42333.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

学懂C#编程:WPF应用开发系列——WPF之ComboBox控件的详细用法

WPF&#xff08;Windows Presentation Foundation&#xff09;中的ComboBox控件是一个下拉列表控件&#xff0c;允许用户从一组预定义的选项中选择一个选项。以下是ComboBox控件的详细用法&#xff0c;并附带示例说明。 ComboBox的基本用法 1. XAML定义&#xff1a; 在XAML中…

multisim中关于74ls192n和DSWPK开关仿真图分析(减法计数器)

&#x1f3c6;本文收录于「Bug调优」专栏&#xff0c;主要记录项目实战过程中的Bug之前因后果及提供真实有效的解决方案&#xff0c;希望能够助你一臂之力&#xff0c;帮你早日登顶实现财富自由&#x1f680;&#xff1b;同时&#xff0c;欢迎大家关注&&收藏&&…

直播预告 | VMware大规模迁移实战,HyperMotion助力业务高效迁移

2006年核高基专项启动&#xff0c;2022年国家79号文件要求2027年央国企100%完成信创改造……国家一系列信创改造政策的推动&#xff0c;让服务器虚拟化软件巨头VMware在中国的市场份额迅速缩水。 加之VMware永久授权的取消和部分软件组件销售策略的变更&#xff0c;导致VMware…

开发一个HTTP模块

开发一个HTTP模块 HTTP模块的数据结构ngx_module_t模块的数据结构ngx_http_module_t数据结构ngx_command_s 数据结构 定义一个HTTP模块处理用户请求返回值获取URI和参数方法名URIURL协议版本 获取HTTP头获取HTTP包体 发送响应发送HTTP头发送内存中的字符串作为包体返回一个Hell…

什么时候考虑将mysql数据迁移到ES?

文章目录 对ES的一些疑问问题1:ES相比mysql本身有哪些优势&#xff1f;问题2:哪些场景适合用ES而不是mysql&#xff1f;问题3:mysql逐行扫描&#xff0c;根据过滤条件检查记录中对应字段是否满足要求属于正排索引&#xff0c;根据二叉树索引检索记录的方式属于正排索引还是倒排…

SpringBoot整合DataX数据同步(自动生成job文件)

SpringBoot整合Datax数据同步 文章目录 SpringBoot整合Datax数据同步1.简介设计理念 DataX3.0框架设计DataX3.0核心架构核心模块介绍DataX调度流程 2.DataX3.0插件体系3.数据同步1.编写job的json文件2.进入bin目录下&#xff0c;执行文件 4.SpringBoot整合DataX生成Job文件并执…

生产力工具|VS Code安装及使用指南

一、VS Code介绍 &#xff08;一&#xff09;软件介绍 Visual Studio Code&#xff08;简称VS Code&#xff09;是由Microsoft开发的免费开源代码编辑器&#xff0c;适用于Windows、macOS和Linux操作系统。它支持多种编程语言&#xff0c;如JavaScript、Python、C等&#xff0…

知识社区在线提问小程序模板源码

蓝色的知识问答&#xff0c;问答交流&#xff0c;知识社区&#xff0c;在线提问手机app小程序网页模板。包含&#xff1a;社区主页、提问、我的、绑定手机&#xff0c;实名认证等。 知识社区在线提问小程序模板源码

ubuntu 检查硬盘的通电时长、健康度

ubuntu 检查硬盘的通电时长、健康度 在Ubuntu系统中&#xff0c;检查硬盘的通电时长和健康度通常需要使用SMART&#xff08;Self-Monitoring, Analysis, and Reporting Technology&#xff09;工具。SMART是硬盘制造商内置的一套硬盘保护技术&#xff0c;用于监控硬盘的健康状况…

品质至上!中国星坤连接器的发展之道!

在电子连接技术领域&#xff0c;中国星坤以其卓越的创新能力和对品质的不懈追求&#xff0c;赢得了业界的广泛认可。凭借在高精度连接器设计和制造上的领先地位&#xff0c;星坤不仅获得了多项实用新型专利&#xff0c;更通过一系列国际质量管理体系认证&#xff0c;彰显了其产…

【Qt5.12.9】程序无法显示照片问题(已解决)

问题记录&#xff1a;Qt5.12.9下无法显示照片 我的工程名为03_qpainter&#xff0c;照片cd.png存放在工程目录下的image文件夹中。 /03_qpainter/image/cd.png 因为这是正点原子Linux下Qt书籍中的例程&#xff0c;在通过学习其配套的例程中的项目&#xff0c;发现我的项目少…

【Python】搭建属于自己 AI 机器人

目录 前言 1 准备工作 1.1 环境搭建 1.2 获取 API KEY 2 写代码 2.1 引用库 2.2 创建用户 2.3 创建对话 2.4 输出内容 2.5 调试 2.6 全部代码 2.7 简短的总结 3 优化代码 3.1 规范代码 3.1.1 引用库 3.1.2 创建提示词 3.1.3 创建模型 3.1.4 规范输出&#xf…

在线调试网络接口的免费网站

免费接口网站 GET接口 https://httpbin.org/get https://httpbin.org/ip https://publicobject.com/helloworld.txt POST接口 https://httpbin.org/post 调试网站 Postman需要下载安装&#xff0c;还要登录账号。对于简单测试&#xff0c;麻烦&#xff01; http://coolaf.…

西门子1200高速计数器编码器的应用 接线 组态 编程 调试 测距测速

编码器的应用、接线、组态、博途1200编程与调试&#xff1a;高速计数器&#xff0c;用于给PLC发高速脉冲&#xff0c;接I点 用来例如&#xff1a;检测电机转速&#xff0c;皮带输送机运行的距离 &#xff08;粗略定位&#xff09; 360&#xff1a;代表转一圈会对外发360个脉冲&…

系统化学习 H264视频编码(02) I帧 P帧 B帧 引入及相关概念解读

说明&#xff1a;我们参考黄金圈学习法&#xff08;什么是黄金圈法则?->模型 黄金圈法则&#xff0c;本文使用&#xff1a;why-what&#xff09;来学习音H264视频编码。本系列文章侧重于理解视频编码的知识体系和实践方法&#xff0c;理论方面会更多地讲清楚 音视频中概念的…

Python类实例的json

web开发中有这么一个场景&#xff0c;我们从数据库中查询某一数据的时候&#xff0c;往往需要对数据进行一些转化之后才能传给前端。 当然我们可以根据查询出来的实例对象&#xff0c;构建一个dict返回&#xff0c;这样会导致我们的代码非常的臃肿。但是这也确实是一种最直接的…

网络空间测绘是什么?

网络空间测绘是一种技术过程&#xff0c;用于探测、分析和可视化互联网及其他网络环境中的各种资源和连接。这个概念在2016年开始广泛使用&#xff0c;它涉及到收集有关网络节点&#xff08;如服务器、路由器、个人电脑和其他设备&#xff09;的信息&#xff0c;并建立这些节点…

C++ STL 多线程库用法介绍

目录 一:Atomic: 二:Thread 1. 创建线程 2. 小心移动(std::move)线程 3. 如何创建带参数的线程 4. 线程参数是引用类型时,要小心谨慎。 5. 获取线程ID 6. jthread 7. 如何在线程中使用中断 stop_token 三:如何解决数据竞争 1.有问题的代码 2.使用互斥 3.预防…

Vue3+.NET6前后端分离式管理后台实战(二十八)

1&#xff0c;Vue3.NET6前后端分离式管理后台实战(二十八)

【Linux进阶】文件系统6——理解文件操作

目录 1.文件的读取 1.1.目录 1.2.文件 1.3.目录树读取 1.4.文件系统大小与磁盘读取性能 2.增添文件 2.1.数据的不一致&#xff08;Inconsistent&#xff09;状态 2.2.日志式文件系统&#xff08;Journaling filesystem&#xff09; 3.Linux文件系统的运行 4、文件的删…