cuda 官网文档名:CUDA_Runtime_API
运行时API查询GPU信息
调用
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, device_id)
定义
由此可见,只能在主机上调用。
#include <cuda_runtime.h>#include <iostream>
#include <stdio.h>static void CheckCudaErrorAux(const char*, unsigned, const char*, cudaError_t);
#define CUDA_CHECK_RETURN(value) CheckCudaErrorAux(__FILE__,__LINE__, #value, value)int main()
{int iDeviceId = 0;CUDA_CHECK_RETURN(cudaGetDevice(&iDeviceId));cudaDeviceProp prop;cudaGetDeviceProperties(&prop, iDeviceId);std::cout << "device_id : " << iDeviceId << std::endl;std::cout << "device_name : " << prop.name << std::endl;std::cout << "compute capability : " << prop.major << "." << prop.minor << std::endl;std::cout << "amount of global memory : " << prop.totalGlobalMem / (1024.0*1024*1024)<<"GB" << std::endl;std::cout << "amount of constant memory : " << prop.totalConstMem / 1024.0 << "KB" << std::endl;std::cout << "maximum grid size : " << prop.maxGridSize[0] <<"," << prop.maxGridSize[1] << "," << prop.maxGridSize[2] << std::endl;std::cout << "maximum block size : " << prop.maxThreadsDim[0] << "," << prop.maxThreadsDim[1] << "," << prop.maxThreadsDim[2] << std::endl;std::cout << "number of SMs : " << prop.multiProcessorCount << std::endl;std::cout << "maximum amount of shared memory per block : " << prop.sharedMemPerBlock / 1024.0 << "KB" << std::endl;std::cout << "maximum amount of shared memory per SM : " << prop.sharedMemPerMultiprocessor / 1024.0 << "KB" << std::endl;std::cout << "maximum number of registers per block : " << prop.regsPerBlock / 1024.0 << "K" << std::endl;std::cout << "maximum number of registers per SM : " << prop.regsPerMultiprocessor / 1024.0 << "K" << std::endl;std::cout << "maximum number of thread per block : " << prop.maxThreadsPerBlock << std::endl;std::cout << "maximum number of thread per SM : " << prop.maxThreadsPerMultiProcessor << std::endl;return 0;
}static void CheckCudaErrorAux(const char* file, unsigned line, const char* statement, cudaError_t err)
{if (err == cudaSuccess)return;std::cerr << statement << " returned: " << cudaGetErrorName(err) << " \t : " << cudaGetErrorString(err) << "(" << err << ") at " << file << ":" << line << std::endl;exit(1);
}
结果
查询GPU计算核心数量
#include "cuda_runtime_api.h"
#include <stdio.h>
#include <iostream>static void CheckCudaErrorAux(const char*, unsigned, const char*, cudaError_t);
#define CUDA_CHECK_RETURN(value) CheckCudaErrorAux(__FILE__,__LINE__, #value, value)int getSPcores(cudaDeviceProp devProp)
{int cores = 0;int mp = devProp.multiProcessorCount;switch (devProp.major) {case 2: // Fermiif (devProp.minor == 1) cores = mp * 48;else cores = mp * 32;break;case 3: // Keplercores = mp * 192;break;case 5: // Maxwellcores = mp * 128;break;case 6: // Pascalif ((devProp.minor == 1) || (devProp.minor == 2)) cores = mp * 128;else if (devProp.minor == 0) cores = mp * 64;else printf("Unknown device type\n");break;case 7: // Volta and Turingif ((devProp.minor == 0) || (devProp.minor == 5)) cores = mp * 64;else printf("Unknown device type\n");break;case 8: // Ampereif (devProp.minor == 0) cores = mp * 64;else if (devProp.minor == 6) cores = mp * 128;else if (devProp.minor == 9) cores = mp * 128; // ada lovelaceelse printf("Unknown device type\n");break;case 9: // Hopperif (devProp.minor == 0) cores = mp * 128;else printf("Unknown device type\n");break;default:printf("Unknown device type\n");break;}return cores;
}int main()
{int iDeviceId = 0;CUDA_CHECK_RETURN(cudaGetDevice(&iDeviceId));cudaDeviceProp prop;CUDA_CHECK_RETURN(cudaGetDeviceProperties(&prop, iDeviceId));std::cout << "Compute cores is " << getSPcores(prop) << std::endl;return 0;
}static void CheckCudaErrorAux(const char* file, unsigned line, const char* statement, cudaError_t err)
{if (err == cudaSuccess)return;std::cerr << statement << " returned: " << cudaGetErrorName(err) << " \t : " << cudaGetErrorString(err) << "(" << err << ") at " << file << ":" << line << std::endl;exit(1);
}