目录
- OpenCL Platform
- OpenCL Device
- 参考代码
OpenCL Platform
opencl
支持的 Platform
可以使用 clGetPlatformIDs
函数查询,函数原型如下:
clGetPlatformIDs(cl_uint /* num_entries */,cl_platform_id * /* platforms */,cl_uint * /* num_platforms */);
如果将platforms
参数设置为nullptr
,num_platforms
会返回当前平台上可用的platform
数量
一般将 num_entries
和 platforms
设置为 0 和 nullptr
来查询可用的platform
数量
在得到当前支持platform 的前提下,设置 num_entries
和 platforms
就可以获取到所有的 platfromID
,参考代码如下:
std::vector<cl_platform_id> clplatform;cl_uint num_platform;err = clGetPlatformIDs(0, nullptr, &num_platform);std::cout << "number of platforms: " << num_platform << std::endl;clplatform.resize(num_platform);err = clGetPlatformIDs(num_platform, clplatform.data(), NULL);
获取到 platfromID
之后,可以使用 clGetPlatformInfo
获取平台的信息,其原型如下:
clGetPlatformInfo(cl_platform_id /* platform */, cl_platform_info /* param_name */,size_t /* param_value_size */, void * /* param_value */,size_t * /* param_value_size_ret */);
cl_platform_id
前面获取的的platformId
cl_platform_info
设置需要获取到的platformInfo
param_value_size
对应的param_value
的字符串大小param_value
对应的param_value
的字符串指针- 返回获取的
param_value_size
一般先将param_value_size
和param_value
设置为 0 和nullptr
,返回param_value
的size
大小,
然后使用获取到的size
传递给param_value_size
,得到对应的param_value
对于不同的platformInfo
信息,含义如下:
platformInfo | 返回类型 | 说明 |
---|---|---|
CL_PLATFORM_PROFILE | char[] | FULL_PROFILE 或者 EMBEDDED_PROFILE |
CL_PLATFORM_VERSION | char[] | opencl 版本 |
CL_PLATFORM_NAME | char[] | 平台名称 |
CL_PLATFORM_VENDOR | char[] | 平台厂商 |
CL_PLATFORM_EXTENSIONS | char[] | 平台支持扩展名 |
FULL_PROFILE
:支持定位为核心规范的所有功能
EMBEDDED_PROFILE
: 支持定位为核心规范的所有功能的一个子集
OpenCL Device
每个平台可能关联到一组计算设备,应用程序通过这些计算设备执行内核程序,使用
clGetDeviceIDs
查询支持的设备列表
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDs(cl_platform_id /* platform */,cl_device_type /* device_type */, cl_uint /* num_entries */, cl_device_id * /* devices */, cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
这个函数会得到于 platformID 关联的所有的 OpenCL 设备列表,如果参数 devices 设置为 null,clGetDeviceIDs
会返回设备数,返回的设备数可以用 num_entries
来限制(0 < num_entries <= 设备数)
其中的 cl_device_type 可以是下表中的一个值:
cl_device_type | 描述 |
---|---|
CL_DEVICE_TYPE_CPU | 作为宿主机处理器的 OpenCL 设备 |
CL_DEVICE_TYPE_GPU | 作为 GPU 的 OpenCL 设备 |
CL_DEVICE_TYPE_ACCELERATOR | OpenCL 加速器 |
CL_DEVICE_TYPE_ALL | 和平台关联的所有 OpenCL 设备 |
给定一个设备,可以使用下面函数查询各种属性:
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceInfo(cl_device_id /* device */,cl_device_info /* param_name */, size_t /* param_value_size */, void * /* param_value */,size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
可以将param_value_size
和param_value
设置为 0 和 nullptr 来查询返回值的大小
具体可以查询的 Device 属性如下图所示:
参考代码
获取 platform 和 device 属性的参考代码如下:
void getCLPlatformInfo() {cl_int err;//cl_platform_id* platform;std::vector<cl_platform_id> clplatform;cl_uint num_platform;std::vector<cl_device_id> cldevice;cl_uint num_device;err = clGetPlatformIDs(0, nullptr, &num_platform);std::cout << "number of platforms: " << num_platform << std::endl;clplatform.resize(num_platform);err = clGetPlatformIDs(num_platform, clplatform.data(), NULL);for (auto& platform : clplatform) {size_t size;err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);cl_char* PName = new cl_char[size];err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, PName, NULL);printf("CL_PLATFORM_NAME: %s\n", PName);err = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, 0, NULL, &size);cl_char* PVendor = new cl_char[size];err = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, size, PVendor, NULL);printf("CL_PLATFORM_VENDOR: %s\n", PVendor);err = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size);cl_char* PVersion = new cl_char[size];err = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, PVersion, NULL);printf("CL_PLATFORM_VERSION: %s\n", PVersion);err = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, 0, NULL, &size);cl_char* PProfile = new cl_char[size];err = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, size, PProfile, NULL);printf("CL_PLATFORM_PROFILE: %s\n", PProfile);err = clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, 0, NULL, &size);cl_char* PExten = new cl_char[size];err = clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, size, PExten, NULL);printf("CL_PLATFORM_EXTENSIONS: %s\n", PExten);delete[] PName;delete[] PVendor;delete[] PVersion;delete[] PProfile;delete[] PExten;}std::cout << "\n=======================================================\n" << std::endl;for (auto& platform : clplatform) {size_t size;err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &num_device);std::cout << "num of device:" << num_device << std::endl;cldevice.resize(num_device);err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_device, cldevice.data(), nullptr);for (auto& device : cldevice) {// deviceNameerr = clGetDeviceInfo(device, CL_DEVICE_NAME, 0, nullptr, &size);cl_char* PDeviceName = new cl_char[size];err = clGetDeviceInfo(device, CL_DEVICE_NAME, size, PDeviceName, nullptr);std::cout << "PDeviceName:" << PDeviceName << std::endl;// device vendorerr = clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, nullptr, &size);cl_char* PDeviceVendor = new cl_char[size];err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, size, PDeviceVendor, nullptr);std::cout << "PDeviceVendor:" << PDeviceVendor << std::endl;// driver versionerr = clGetDeviceInfo(device, CL_DRIVER_VERSION, 0, nullptr, &size);cl_char* PDriverVersion = new cl_char[size];err = clGetDeviceInfo(device, CL_DRIVER_VERSION, size, PDriverVersion, nullptr);std::cout << "PDriverVersion:" << PDriverVersion << std::endl;// device profileerr = clGetDeviceInfo(device, CL_DEVICE_PROFILE, 0, nullptr, &size);cl_char* PProfile = new cl_char[size];err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, size, PProfile, nullptr);std::cout << "PProfile:" << PProfile << std::endl;// device versionerr = clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, nullptr, &size);cl_char* PDeviceVersion = new cl_char[size];err = clGetDeviceInfo(device, CL_DEVICE_VERSION, size, PDeviceVersion, nullptr);std::cout << "PDeviceVersion:" << PDeviceVersion << std::endl;// device opencl c versionerr = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr, &size);cl_char* POpenCLCVersion = new cl_char[size];err = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, size, POpenCLCVersion, nullptr);std::cout << "POpenCLCVersion:" << POpenCLCVersion << std::endl;// device extensionerr = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, nullptr, &size);cl_char* PDeviceExtensions = new cl_char[size];err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, size, PDeviceExtensions, nullptr);std::cout << "PDeviceExtensions:" << PDeviceExtensions << std::endl;// 最大计算单元数cl_uint UnitNum;err = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &UnitNum, nullptr);std::cout << "Max ComputeUnit Number:" << UnitNum << std::endl;// 最高核心频率cl_uint frequency;err = clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), &frequency, nullptr);std::cout << "Max clock frequency:" << frequency << std::endl;// 查询设备全局内存大小cl_ulong GlobalSize;err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &GlobalSize, nullptr);std::cout << "GlobalSize Uniform Memory Size(B):" << GlobalSize << std::endl;// 查询设备全局内存缓存行cl_uint GlobalCacheLine;err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cl_uint), &GlobalCacheLine, nullptr);std::cout << "Device Global CacheLine(B):" << GlobalCacheLine << std::endl;delete[] PDeviceName;delete[] PDeviceVendor;delete[] PDriverVersion;delete[] PProfile;delete[] PDeviceVersion;delete[] POpenCLCVersion;delete[] PDeviceExtensions;}}}
在我的 Ryen5 集成 Radeon Graphics Vega 8 集成显卡的笔记本得到的结果如下:
CL_PLATFORM_PROFILE: FULL_PROFILE
CL_PLATFORM_EXTENSIONS: cl_khr_icd cl_khr_d3d10_sharing cl_khr_d3d11_sharing cl_khr_dx9_media_sharing cl_amd_event_callback cl_amd_offline_devices
=======================================================
num of device:1
PDeviceName:gfx902
PDeviceVendor:Advanced Micro Devices, Inc.
PDriverVersion:3570.0 (PAL,HSAIL)
PProfile:FULL_PROFILE
PDeviceVersion:OpenCL 2.0 AMD-APP (3570.0)
POpenCLCVersion:OpenCL C 2.0
PDeviceExtensions:cl_khr_fp64 cl_amd_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_int64_base_atomics cl_khr_int64_extended_atomics cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_fp16 cl_khr_gl_sharing cl_amd_device_attribute_query cl_amd_vec3 cl_amd_printf cl_amd_media_ops cl_amd_media_ops2 cl_amd_popcnt cl_khr_d3d10_sharing cl_khr_d3d11_sharing cl_khr_dx9_media_sharing cl_khr_image2d_from_buffer cl_khr_subgroups cl_khr_gl_event cl_khr_depth_images cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_amd_copy_buffer_p2p cl_amd_planar_yuv
Max ComputeUnit Number:8
Max clock frequency:1201
GlobalSize Uniform Memory Size(B):7534542848
Device Global CacheLine(B):64