13019.CUDA问题积累

文章目录

  • 1 内存不断增长的问题
    • 1.1 主机从GPU拷贝内存
      • 1.1.1 htop 内存增长到一定阶段后,保持稳定
    • 1.2 GPU拷贝到Host修改之后内存稳定无变化
    • 1.3 结论
  • 2 主机与GPU数据拷贝方案
    • 2.1 cudaMemcpy 拷贝内存
    • 2.2 cudaMemcpyAsync 异步数据拷贝
    • 2.3 采用多线程拷贝技术
      • 2.3.1 多线程DDR拷贝
    • 2.3.2 多线程cpu拷贝到cuda内存
      • 2.3.3 结论

1 内存不断增长的问题

背景:cudaMalloc 创建一次,while循环中采用cudaMemcpy 向其中拷贝数据,发现内存会一直增大,最终把系统搞崩溃掉,
为了进行思路验证,找到问题,编写了下面的这个demo.

1.1 主机从GPU拷贝内存

#include <chrono>#define BUFFER_SIZE (2 * 1024 * 1024) // 2MB buffer sizeint main() {// Host memory allocationfloat* h_data;h_data = (float*)malloc(BUFFER_SIZE);// Initialize the host buffer with some datafor (int i = 0; i < BUFFER_SIZE / sizeof(float); ++i) {h_data[i] = static_cast<float>(i);}// Device memory allocationfloat* d_data;cudaMalloc((void**)&d_data, BUFFER_SIZE);// Main loopwhile (true) {// Copy data from host to devicecudaMemcpy(d_data, h_data, BUFFER_SIZE, cudaMemcpyHostToDevice);// Add a sleep to avoid consuming all CPU resources// Adjust the duration as neededstd::this_thread::sleep_for(std::chrono::milliseconds(100));}// CleanupcudaFree(d_data);free(h_data);return 0;
}

1.1.1 htop 内存增长到一定阶段后,保持稳定

  • htop 查看内存情况,发现一直在不断增长.
    在这里插入图片描述
  • 运行一段时间后,htop内存如下,保持稳定了.
    在这里插入图片描述

1.2 GPU拷贝到Host修改之后内存稳定无变化

  while (true) {// Copy data from host to devicecudaMemcpy(d_data, h_data, BUFFER_SIZE, cudaMemcpyHostToDevice);// Add a sleep to avoid consuming all CPU resources// Adjust the duration as neededstd::this_thread::sleep_for(std::chrono::milliseconds(100));cudaMemcpy(h_data, d_data, BUFFER_SIZE, cudaMemcpyDeviceToHost);std::this_thread::sleep_for(std::chrono::milliseconds(100));}

在这里插入图片描述

1.3 结论

通过分析得出 while循环中,调用cudaMemcpy,本身不会导致内存的增加,包括内核函数中使用shared ,也不会有啥影响,
我的工程中,出现内存一直增长,原因是 pcap线程,获取原始数据,入队速度超过了 pixel 线程 cuda的处理速度,导致了内存
不断增长,解决方法原始数据降频。后续优化,参考第2节,cudaMemcpy的优化.

2 主机与GPU数据拷贝方案

  • 忽略内核线程内部数据计算的逻辑,demo的目的,是为了优化拷贝的时间效率

2.1 cudaMemcpy 拷贝内存

#include <cuda_runtime.h>
#include <iostream>
#include <thread>
#include <chrono>#define BUFFER_SIZE (4 * 1024 * 1024) // 2MB buffer sizetypedef struct DistCompenParam{//归一化参数.float    a;float    b;float    c;
}DistCompenParam_T;typedef struct PixelPointLight{struct {uint16_t peak;//uint12_0uint8_t  gray; //经过灰度补偿后,计算出来的灰度值.uint8_t  ech_en;;//字节对齐.float    fwhm_f;float    half_start_pos_f;float    dR; //距离补偿值//计算时的中间变量float   x_peak;float   x_fwhm;}echo[2];
}PixelPointLight_T;typedef struct PixelSlotLight{PixelPointLight_T point[192];
}PixelSlotLight_T;// CUDA kernel function to add a value to each element of the array
__global__ void addValueKernel(PixelSlotLight_T* data, DistCompenParam_T *para, int size) {int idx = blockIdx.x; // * blockDim.x + threadIdx.x;int echoIdx = threadIdx.x;int Idy = threadIdx.y;__shared__ float y[6];if (idx < size) {data->point[idx].echo[echoIdx].gray = para->a + 10;data->point[idx].echo[echoIdx].fwhm_f = para->a + 20;y[Idy] = tanh(data->point[idx].echo[echoIdx].gray + data->point[idx].echo[echoIdx].fwhm_f);__syncthreads();data->point[idx].echo[echoIdx].peak = y[0] + y[1] + y[2] + y[3] + y[4] + y[5];}
}extern "C" void process_algo_gpu(PixelSlotLight_T *pixel_devptr, DistCompenParam_T *para_devptr, int numPoints)
{// Call the CUDA kernel to add a value to each elementdim3 blocksPerGrid(192, 1);dim3 threadsPerBlock(2, 6);addValueKernel<<<blocksPerGrid, threadsPerBlock>>>(pixel_devptr, para_devptr,192);}int main() {PixelSlotLight_T host_data1;int total_time[1024] = {};int count = 0;// Host memory allocationPixelSlotLight_T* h_data;h_data = (PixelSlotLight_T *)malloc(sizeof(PixelSlotLight_T));DistCompenParam_T * h_para;h_para = (DistCompenParam_T *)malloc(sizeof(DistCompenParam_T));h_para->a = 3;h_para->b = 4;h_para->c = 5;// Initialize the host buffer with some datafor (int i = 0; i < 192; ++i) {for(int j = 0; j < 2; j++){h_data->point[i].echo[j].peak = 200;h_data->point[i].echo[j].gray = 100;h_data->point[i].echo[j].fwhm_f = 15;}}// Device memory allocationPixelSlotLight_T *pixel_devptr;cudaMalloc((void**)&pixel_devptr, sizeof(PixelSlotLight_T));DistCompenParam_T *para_devptr;cudaMalloc((void**)&para_devptr, sizeof(DistCompenParam_T));cudaMemcpy(para_devptr, h_para, sizeof(DistCompenParam_T), cudaMemcpyHostToDevice);// Main loopwhile (true) {auto start = std::chrono::high_resolution_clock::now();// Copy data from host to devicecudaMemcpy(pixel_devptr, h_data, sizeof(PixelSlotLight_T), cudaMemcpyHostToDevice);process_algo_gpu(pixel_devptr, para_devptr, 192);// Ensure that the kernel execution has completed before moving oncudaDeviceSynchronize();cudaMemcpy(&host_data1, pixel_devptr, sizeof(PixelSlotLight_T), cudaMemcpyDeviceToHost);// Add a sleep to avoid consuming all CPU resources// Adjust the duration as neededauto end = std::chrono::high_resolution_clock::now();total_time[count] = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();count++;if(count == 1024){count = 0;int sum = 0;for(int i = 0; i < 1024; i++){sum += total_time[i];}std::cout << "time:" << sum/1024 << " us." << std::endl;}}// CleanupcudaFree(pixel_devptr);cudaFree(para_devptr);free(h_data);return 0;
}
  • 执行时间, 均值如下
time:204 us.
time:197 us.
time:222 us.
time:209 us.
time:198 us.
time:196 us.
time:209 us.
time:194 us.
time:189 us.
time:231 us.
time:215 us.
time:264 us.
time:242 us.
time:199 us.
time:235 us.
time:422 us.

2.2 cudaMemcpyAsync 异步数据拷贝

#include <cuda_runtime.h>
#include <iostream>
#include <thread>
#include <chrono>#define BUFFER_SIZE (4 * 1024 * 1024) // 2MB buffer sizetypedef struct DistCompenParam{//归一化参数.float    a;float    b;float    c;
}DistCompenParam_T;typedef struct PixelPointLight{struct {uint16_t peak;//uint12_0uint8_t  gray; //经过灰度补偿后,计算出来的灰度值.uint8_t  ech_en;;//字节对齐.float    fwhm_f;float    half_start_pos_f;float    dR; //距离补偿值//计算时的中间变量float   x_peak;float   x_fwhm;}echo[2];
}PixelPointLight_T;typedef struct PixelSlotLight{PixelPointLight_T point[192];
}PixelSlotLight_T;// CUDA kernel function to add a value to each element of the array
__global__ void addValueKernel(PixelSlotLight_T* data, DistCompenParam_T *para, int size) {int idx = blockIdx.x; // * blockDim.x + threadIdx.x;int echoIdx = threadIdx.x;int Idy = threadIdx.y;__shared__ float y[6];if (idx < size) {data->point[idx].echo[echoIdx].gray = para->a + 10;data->point[idx].echo[echoIdx].fwhm_f = para->a + 20;y[Idy] = tanh(data->point[idx].echo[echoIdx].gray + data->point[idx].echo[echoIdx].fwhm_f);__syncthreads();data->point[idx].echo[echoIdx].peak = y[0] + y[1] + y[2] + y[3] + y[4] + y[5];}
}extern "C" void process_algo_gpu(PixelSlotLight_T *pixel_devptr, DistCompenParam_T *para_devptr, int numPoints)
{// Call the CUDA kernel to add a value to each elementdim3 blocksPerGrid(192, 1);dim3 threadsPerBlock(2, 6);addValueKernel<<<blocksPerGrid, threadsPerBlock>>>(pixel_devptr, para_devptr,192);}int main() {cudaStream_t stream;cudaStreamCreate(&stream);PixelSlotLight_T host_data1;int total_time[1024] = {};int count = 0;// Host memory allocationPixelSlotLight_T* h_data;h_data = (PixelSlotLight_T *)malloc(sizeof(PixelSlotLight_T));DistCompenParam_T * h_para;h_para = (DistCompenParam_T *)malloc(sizeof(DistCompenParam_T));h_para->a = 3;h_para->b = 4;h_para->c = 5;// Initialize the host buffer with some datafor (int i = 0; i < 192; ++i) {for(int j = 0; j < 2; j++){h_data->point[i].echo[j].peak = 200;h_data->point[i].echo[j].gray = 100;h_data->point[i].echo[j].fwhm_f = 15;}}// Device memory allocationPixelSlotLight_T *pixel_devptr;cudaMalloc((void**)&pixel_devptr, sizeof(PixelSlotLight_T));DistCompenParam_T *para_devptr;cudaMalloc((void**)&para_devptr, sizeof(DistCompenParam_T));cudaMemcpy(para_devptr, h_para, sizeof(DistCompenParam_T), cudaMemcpyHostToDevice);// Main loopwhile (true) {auto start = std::chrono::high_resolution_clock::now();// Copy data from host to devicecudaMemcpyAsync(pixel_devptr, h_data, sizeof(PixelSlotLight_T), cudaMemcpyHostToDevice, stream);//  process_algo_gpu(pixel_devptr, para_devptr, 192);cudaMemcpyAsync(&host_data1, pixel_devptr, sizeof(PixelSlotLight_T), cudaMemcpyDeviceToHost, stream);// Add a sleep to avoid consuming all CPU resources// Adjust the duration as neededcudaStreamSynchronize(stream);auto end = std::chrono::high_resolution_clock::now();total_time[count] = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();count++;if(count == 1024){count = 0;int sum = 0;for(int i = 0; i < 1024; i++){sum += total_time[i];}std::cout << "time:" << sum/1024 << " us." << std::endl;}}// CleanupcudaFree(pixel_devptr);cudaFree(para_devptr);cudaStreamDestroy(stream);free(h_data);return 0;
}
  • 感觉上没有量级的提升.
time:203 us.
time:180 us.
time:194 us.
time:179 us.
time:170 us.
time:179 us.
time:184 us.
time:195 us.
time:175 us.
time:176 us.
time:204 us.
time:205 us.
time:176 us.
time:173 us.
time:171 us.
time:198 us.
time:183 us.
time:173 us.
time:184 us.
time:177 us.
time:174 us.
time:174 us.
time:250 us.
time:230 us.
time:272 us.
time:192 us.
time:203 us.
time:197 us.
time:189 us.
time:224 us.
time:223 us.
time:227 us.
time:230 us.

2.3 采用多线程拷贝技术

2.3.1 多线程DDR拷贝

#include <cuda_runtime.h>
#include <iostream>
#include <thread>
#include <chrono>
#include <queue>
#include <semaphore.h>typedef struct DistCompenParam{//归一化参数.float    a;float    b;float    c;
}DistCompenParam_T;typedef struct PixelPointLight{struct {uint16_t peak;//uint12_0uint8_t  gray; //经过灰度补偿后,计算出来的灰度值.uint8_t  ech_en;;//字节对齐.float    fwhm_f;float    half_start_pos_f;float    dR; //距离补偿值//计算时的中间变量float   x_peak;float   x_fwhm;}echo[2];
}PixelPointLight_T;typedef struct PixelSlotLight{PixelPointLight_T point[192];PixelPointLight_T point1[192][600];
}PixelSlotLight_T;// CUDA kernel function to add a value to each element of the array
__global__ void addValueKernel(PixelSlotLight_T* data, DistCompenParam_T *para, int size) {int idx = blockIdx.x; // * blockDim.x + threadIdx.x;int echoIdx = threadIdx.x;int Idy = threadIdx.y;__shared__ float y[6];if (idx < size) {data->point[idx].echo[echoIdx].gray = para->a + 10;data->point[idx].echo[echoIdx].fwhm_f = para->a + 20;y[Idy] = tanh(data->point[idx].echo[echoIdx].gray + data->point[idx].echo[echoIdx].fwhm_f);__syncthreads();data->point[idx].echo[echoIdx].peak = y[0] + y[1] + y[2] + y[3] + y[4] + y[5];}
}extern "C" void process_algo_gpu(PixelSlotLight_T *pixel_devptr, DistCompenParam_T *para_devptr, int numPoints)
{// Call the CUDA kernel to add a value to each elementdim3 blocksPerGrid(192, 1);dim3 threadsPerBlock(2, 6);addValueKernel<<<blocksPerGrid, threadsPerBlock>>>(pixel_devptr, para_devptr,192);}// Device memory allocation
PixelSlotLight_T *pixel_devptr;typedef struct {sem_t sem_p[4];sem_t sem_w[4];PixelSlotLight_T *host_ptr;PixelSlotLight_T *dev_ptr;uint8_t direct; //拷贝的方向.
}Worker_T;Worker_T  worker_res;
/*** index: 分段拷贝索引* len:分段拷贝大小.*/
void worker(int index, int len) {cudaError_t err;while (1) {sem_wait(&worker_res.sem_w[index]); // 等待信号量//host --> devif(worker_res.direct == 0){// err = cudaMemcpy((uint8_t *)worker_res.dev_ptr + index*len , (uint8_t *)worker_res.host_ptr+index*len, len, cudaMemcpyHostToDevice);memcpy((uint8_t *)worker_res.dev_ptr + index*len , (uint8_t *)worker_res.host_ptr+index*len, len);if(err != cudaSuccess){std::cerr << "cudaMemcpy failed with error: " << cudaGetErrorString(err) << std::endl;}}else if(worker_res.direct == 1){ //dev-->hosterr = cudaMemcpy((uint8_t*)worker_res.host_ptr+index*len, (uint8_t*)worker_res.dev_ptr + index*len, len, cudaMemcpyDeviceToHost);if(err != cudaSuccess){std::cerr << "cudaMemcpy failed with error: " << cudaGetErrorString(err) << std::endl;}}sem_post(&worker_res.sem_p[index]);}
}inline void host_to_dev_memcpy(Worker_T *worker_res_)
{for(int i= 0; i < 4; i++){worker_res_->direct = 0;sem_post(&worker_res_->sem_w[i]);}for(int i= 0; i < 4; i++){sem_wait(&worker_res_->sem_p[i]);}
}inline void dev_to_host_memcpy(Worker_T *worker_res_)
{for(int i= 0; i < 4; i++){worker_res_->direct = 1;sem_post(&worker_res_->sem_w[i]);}for(int i= 0; i < 4; i++){sem_wait(&worker_res_->sem_p[i]);}
}int main() {std::vector<std::thread> threads;for (int i = 0; i < 4; ++i) {threads.emplace_back(worker, i,sizeof(PixelSlotLight_T)/4);}cudaStream_t stream;cudaStreamCreate(&stream);PixelSlotLight_T host_data1;int total_time[4096] = {};int count = 0;// Host memory allocationPixelSlotLight_T* h_data;h_data = (PixelSlotLight_T *)malloc(sizeof(PixelSlotLight_T));PixelSlotLight_T* h_data1;h_data1 = (PixelSlotLight_T *)malloc(sizeof(PixelSlotLight_T));DistCompenParam_T * h_para;h_para = (DistCompenParam_T *)malloc(sizeof(DistCompenParam_T));h_para->a = 3;h_para->b = 4;h_para->c = 5;// Initialize the host buffer with some datafor (int i = 0; i < 192; ++i) {for(int j = 0; j < 2; j++){h_data->point[i].echo[j].peak = 200;h_data->point[i].echo[j].gray = 100;h_data->point[i].echo[j].fwhm_f = 15;}}cudaMalloc((void**)&pixel_devptr, sizeof(PixelSlotLight_T));DistCompenParam_T *para_devptr;cudaMalloc((void**)&para_devptr, sizeof(DistCompenParam_T));cudaMemcpy(para_devptr, h_para, sizeof(DistCompenParam_T), cudaMemcpyHostToDevice);printf("PixelSlotLight_T:%ld Byte.\n", sizeof(PixelSlotLight_T));// Main loopwhile (true) {auto start = std::chrono::high_resolution_clock::now();worker_res.host_ptr = &host_data1;worker_res.dev_ptr = h_data1;worker_res.direct = 0;host_to_dev_memcpy(&worker_res);//  cudaMemcpy(&host_data1, pixel_devptr, sizeof(PixelSlotLight_T), cudaMemcpyDeviceToHost);// Add a sleep to avoid consuming all CPU resources// Adjust the duration as needed// cudaStreamSynchronize(stream);auto end = std::chrono::high_resolution_clock::now();total_time[count] = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();count++;if(count == 4096){count = 0;int sum = 0;for(int i = 0; i < 4096; i++){sum += total_time[i];}std::cout << "time:" << sum/4096 << " us." << std::endl;}}// CleanupcudaFree(pixel_devptr);cudaFree(para_devptr);cudaStreamDestroy(stream);free(h_data);return 0;
}
./host7 
PixelSlotLight_T:5538816 Byte.
time:879 us.
time:870 us.
time:888 us.
time:932 us.
time:924 us.
time:943 us.
time:1056 us.
time:1501 us.
time:1051 us.
time:1205 us.
time:734 us.
time:504 us.
time:550 us.
time:545 us.
time:669 us.

2.3.2 多线程cpu拷贝到cuda内存

  • 4线程cpu拷贝数据到GPU
PixelSlotLight_T:5538816 Byte.
time:2058 us.
time:2333 us.
time:2417 us.
time:2151 us.
time:1664 us.
time:1649 us.
  • 单线程cpu拷贝cuda
./host7 
PixelSlotLight_T:5538816 Byte.
time:2058 us.
time:2333 us.
time:2417 us.
time:2151 us.
time:1664 us.
time:1649 us
  • 单线程cpu拷贝数据到GPU
PixelSlotLight_T:5538816 Byte.
time:1649 us.
time:1675 us.
time:1667 us.
time:1631 us.
time:1484 us.
time:1281 us.
time:1256 us.
time:1256 us.
time:1302 us.
time:1586 us.
time:1444 us.

2.3.3 结论

  • 分析由于:CPU与GPU 之间采用的是 PCIE传输数据,总带宽固定,多线程可能有竞争,信号量,反而并没有优势。
  • 相同条件下,提升效率的方式,采用单次匹配适当的数据比较合适,它有一个 临界点,小一个临界点之类,耗时区别不大。

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/diannao/45146.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

群主必学!轻松Get如何解散微信群的技巧

作为一个微信群的群主&#xff0c;解散群聊可能是你需要掌握的重要技能之一。不管是因为群聊的目的已经达成&#xff0c;还是因为群成员过少或不活跃&#xff0c;了解如何解散微信群都能帮助你更好地管理你的群聊。 如何解散微信群&#xff1f;本文将为您提供一些简单易行的技…

代码随想录算法训练营第五十天| 739. 每日温度、496.下一个更大元素 I、503.下一个更大元素II

739. 每日温度 题目链接&#xff1a; 739. 每日温度 文档讲解&#xff1a;代码随想录 状态&#xff1a;不会 思路&#xff1a; 这道题需要找到下一个更大元素。 使用栈来存储未找到更高温度的下标&#xff0c;那么栈中的下标对应的温度从栈底到栈顶是递减的。这意味着&#xff…

Redis数据同步

文章简单介绍基于redis-shake的redis数据同步&#xff0c;该工具基于每个节点同步数据&#xff0c;即每个主节点需同步一次&#xff0c;才能完成整个redis集群的数据同步。 1、redis节点操作 ### 查看redis版本 ./bin/redis-server --version### 登录redis ./bin/redis-cli -…

改变Ubuntu的Tab没有缩进4格(Makefile)

1.vim里的Tab 用vi指令打开这个文件&#xff0c;没有的话就新创建一个 vi ~/.vimrc在打开的文件中输入以下两行 1 set tabstop42 set shiftwidth4 ~ Esc &#xff1a; x&#xff0c;保存并退出即可 资料来源&#xff1a; 2024年5月21日-vi/vim …

Linux Ubuntu MySQL环境安装

1. 更新软件源 首先&#xff0c;确保你的Ubuntu系统已经更新了软件源列表&#xff0c;以便能够下载到最新的软件包。打开终端并输入以下命令&#xff1a; sudo apt update 2. 安装MySQL服务器 打开终端并输入以下命令来安装MySQL服务器 sudo apt install mysql-server 在…

一个便捷的web截图库~【送源码】

随着时间的发展&#xff0c;前端开发的范围越来越广&#xff0c;能够实现的功能也越来越多&#xff0c;要实现的功能也五花八门&#xff0c;今天就给大家介绍一个web截图库,让前端也能实现截图功能—— js-web-screen-shot js-web-screen-shot js-web-screen-shot 是一个基于 …

嵌入式板级支持包(BSP)80道面试题及参考答案(3万字长文)

目录 解释什么是通用输入输出(GPIO)接口及其在BSP中的作用。 描述SPI接口的主要特点和用途。 说明IC总线协议的工作原理。 如何在BSP中配置一个UART接口? USB设备控制器在BSP中的初始化步骤是什么? 以太网接口如何在BSP中被支持? 什么是SDIO,它在哪些场景下会被使…

语言模型演进:从NLP到LLM的跨越之旅

在人工智能的浩瀚宇宙中&#xff0c;自然语言处理&#xff08;NLP&#xff09;一直是一个充满挑战和机遇的领域。随着技术的发展&#xff0c;我们见证了从传统规则到统计机器学习&#xff0c;再到深度学习和预训练模型的演进。如今&#xff0c;我们站在了大型语言模型&#xff…

【接口设计】如何设计统一 RESTful 风格的数据接口

如何设计统一 RESTful 风格的数据接口 1.版本控制1.1 通过 URL1.2 通过自定义请求头1.3 通过 Accept 标头 2.过滤信息3.确定 HTTP 的方法4.确定 HTTP 的返回状态5.定义统一返回的格式 近年来&#xff0c;随着移动互联网的发展&#xff0c;各种类型的客户端层出不穷。如果不统一…

Mybatis-Plus最优化持久层开发

Mybatis-plus&#xff1a;最优化持久层开发 一&#xff1a;Mybatis-plus快速入门&#xff1a; 1.1&#xff1a;简介&#xff1a; Mybatis-plus&#xff08;简称MP&#xff09;是一个Mybatis的增强工具&#xff0c;在mybatis的基础上只做增强不做改变; 提高效率&#xff1b;自…

国漫推荐11

1.《元龙》 2.《恶魔法则》2023年9月29日 3.《三十六骑》 4.《山河剑心》 5.剑网3侠肝义胆沈剑心 《剑网3侠肝义胆沈剑心》 《剑网3侠肝义胆沈剑心 第二季》 《剑网3侠肝义胆沈剑心之长漂》&#xff08;番外&#xff09; 《剑网3侠肝义胆沈剑心 第三季》 6.《仙逆》东方玄幻…

Uniswap V2和Uniswap V3的区别

Uniswap V2和Uniswap V3是两个不同版本的去中心化交易协议&#xff0c;由Uniswap团队开发和维护。它们之间的主要区别包括以下几点&#xff1a; 资金池模型不同: Uniswap V2: 使用恒定乘积市场模型&#xff0c;也就是 x * y k。这意味着每个资金池中的资产的乘积保持不变&…

Transformer的模型的扩展与应用领域的拓展 - Transformer教程

在如今的人工智能领域&#xff0c;Transformer模型已经成为了众多研究和应用的焦点。从自然语言处理到计算机视觉&#xff0c;Transformer模型的扩展与应用领域的拓展带来了无数的可能性。今天&#xff0c;我们就来聊聊Transformer模型的扩展以及它在不同领域的广泛应用。 首先…

生产管理系统功能全拆解:哪些功能是企业真正需要的?

制造业的伙伴经常听到“生产管理”&#xff0c;但很多人可能只是模糊地知道它与工厂、生产线有关。那么&#xff0c;到底什么是生产管理呢&#xff1f;它的重要性又体现在哪里呢&#xff1f;接下来&#xff0c;我就以轻松的方式&#xff0c;带大家走进生产管理的世界&#xff0…

函数练习·二 基础题

# 【以下功能都使用函数封装】 # 提示: 涉及到要返回的题目,请使用return # 基础题 # 1.封装函数&#xff0c;计算从1到某个数以内所有奇数的和并返回 def fn1(n): return sum([i for i in range(1, n, 2)]) print(fn1(7)) # 2.封装函数&#xff0c;判断某个数是否是偶…

微信闪退怎么回事?实用技巧助你轻松应对

在使用微信的过程中&#xff0c;偶尔会遇到闪退的问题&#xff0c;这不仅影响我们的日常沟通&#xff0c;还可能导致重要信息的丢失。那么&#xff0c;微信闪退怎么回事呢&#xff1f;闪退的原因可能有很多&#xff0c;包括软件问题、手机存储不足、系统不兼容等。本文将详细分…

笔记本电脑数据丢失如何恢复?

在计算机网络日益普及的今天&#xff0c;计算机已波及到人们的生活、工作、学习及消费等广泛领域&#xff0c;其服务和管理也涉及政府、工商、金融及用户等诸多方面。笔记本电脑等电子产品被各行各业的人所喜爱和接受&#xff0c;早已成为人们出差的必备品&#xff0c;可以用来…

keepalived高可用集群

一、keepalived&#xff1a; 1.keepalive是lvs集群中的高可用架构&#xff0c;只是针对调度器的高可用&#xff0c;基于vrrp来实现调度器的主和备&#xff0c;也就是高可用的HA架构&#xff1b;设置一台主调度器和一台备调度器&#xff0c;在主调度器正常工作的时候&#xff0…

OS_同步与互斥

2024-07-04&#xff1a;操作系统同步与互斥学习笔记 第9节 同步与互斥 9.1 同步互斥的基本概念9.1.1 同步关系9.1.2 互斥关系9.1.3 临界资源9.1.4 临界区9.1.5 同步机制应遵循规则 9.2 软件同步机制9.2.1 单标志法9.2.2 双标志先检查法9.2.3 双标志后检查法9.2.4 peterson算法 …

BP神经网络与反向传播算法在深度学习中的应用

BP神经网络与反向传播算法在深度学习中的应用 在神经网络的发展历史中&#xff0c;BP神经网络&#xff08;Backpropagation Neural Network&#xff09;占有重要地位。BP神经网络通过反向传播算法进行训练&#xff0c;这种算法在神经网络中引入了一种高效的学习方式。随着深度…