最近在研究如何使用使用CMake实现构建OpenCL程序,还是以数组加法为例子。该应用程序的CMake构建脚本将其构建为ISO C11应用程序,并打开了最合理的编译器警告,其CMakeLists.txt下:
cmake_minimum_required(VERSION 3.10) # 3.10 << C_STANDARD 11project(HelloWorld)find_package(OpenCL REQUIRED)add_executable(${PROJECT_NAME} HelloWorld.cpp )target_link_libraries(${PROJECT_NAME} PRIVATE OpenCL::OpenCL)configure_file(HelloWorld.cl ${CMAKE_CURRENT_BINARY_DIR}/HelloWorld.cl COPYONLY)set_target_properties(${PROJECT_NAME} PROPERTIES C_STANDARD 11C_STANDARD_REQUIRED ONC_EXTENSIONS OFF)target_compile_definitions(${PROJECT_NAME} PRIVATE CL_TARGET_OPENCL_VERSION=100)
CmakeList中一定要加上:
configure_file(HelloWorld.cl ${CMAKE_CURRENT_BINARY_DIR}/HelloWorld.cl COPYONLY)
否则编译的时候找不到HelloWorld.cl。
HelloWorld.cl 代码如下:
__kernel void hello_kernel(__global const float *a,__global const float *b,__global float *result)
{int gid = get_global_id(0);result[gid] = a[gid] + b[gid];
}
HelloWorld.cpp代码如下:
#include <iostream>
#include <fstream>
#include <sstream>#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif///
// Constants
//
const int ARRAY_SIZE = 100;///
// Create an OpenCL context on the first available platform using
// either a GPU or CPU depending on what is available.
//
cl_context CreateContext()
{cl_int errNum;cl_uint numPlatforms;cl_platform_id firstPlatformId;cl_context context = NULL;// First, select an OpenCL platform to run on. For this example, we// simply choose the first available platform. Normally, you would// query for all available platforms and select the most appropriate one.errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);if (errNum != CL_SUCCESS || numPlatforms <= 0){std::cerr << "Failed to find any OpenCL platforms." << std::endl;return NULL;}// Next, create an OpenCL context on the platform. Attempt to// create a GPU-based context, and if that fails, try to create// a CPU-based context.cl_context_properties contextProperties[] ={CL_CONTEXT_PLATFORM,(cl_context_properties)firstPlatformId,0};context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,NULL, NULL, &errNum);if (errNum != CL_SUCCESS){std::cout << "Could not create GPU context, trying CPU..." << std::endl;context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,NULL, NULL, &errNum);if (errNum != CL_SUCCESS){std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;return NULL;}}return context;
}///
// Create a command queue on the first device available on the
// context
//
cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)
{cl_int errNum;cl_device_id *devices;cl_command_queue commandQueue = NULL;size_t deviceBufferSize = -1;// First get the size of the devices buffererrNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);if (errNum != CL_SUCCESS){std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)";return NULL;}if (deviceBufferSize <= 0){std::cerr << "No devices available.";return NULL;}// Allocate memory for the devices bufferdevices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);if (errNum != CL_SUCCESS){delete [] devices;std::cerr << "Failed to get device IDs";return NULL;}// In this example, we just choose the first available device. In a// real program, you would likely use all available devices or choose// the highest performance device based on OpenCL device queriescommandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);if (commandQueue == NULL){delete [] devices;std::cerr << "Failed to create commandQueue for device 0";return NULL;}*device = devices[0];delete [] devices;return commandQueue;
}///
// Create an OpenCL program from the kernel source file
//
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{cl_int errNum;cl_program program;std::ifstream kernelFile(fileName, std::ios::in);if (!kernelFile.is_open()){std::cerr << "Failed to open file for reading: " << fileName << std::endl;return NULL;}std::ostringstream oss;oss << kernelFile.rdbuf();std::string srcStdStr = oss.str();const char *srcStr = srcStdStr.c_str();program = clCreateProgramWithSource(context, 1,(const char**)&srcStr,NULL, NULL);if (program == NULL){std::cerr << "Failed to create CL program from source." << std::endl;return NULL;}errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);if (errNum != CL_SUCCESS){// Determine the reason for the errorchar buildLog[16384];clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,sizeof(buildLog), buildLog, NULL);std::cerr << "Error in kernel: " << std::endl;std::cerr << buildLog;clReleaseProgram(program);return NULL;}return program;
}///
// Create memory objects used as the arguments to the kernel
// The kernel takes three arguments: result (output), a (input),
// and b (input)
//
bool CreateMemObjects(cl_context context, cl_mem memObjects[3],float *a, float *b)
{memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,sizeof(float) * ARRAY_SIZE, a, NULL);memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,sizeof(float) * ARRAY_SIZE, b, NULL);memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,sizeof(float) * ARRAY_SIZE, NULL, NULL);if (memObjects[0] == NULL || memObjects[1] == NULL || memObjects[2] == NULL){std::cerr << "Error creating memory objects." << std::endl;return false;}return true;
}///
// Cleanup any created OpenCL resources
//
void Cleanup(cl_context context, cl_command_queue commandQueue,cl_program program, cl_kernel kernel, cl_mem memObjects[3])
{for (int i = 0; i < 3; i++){if (memObjects[i] != 0)clReleaseMemObject(memObjects[i]);}if (commandQueue != 0)clReleaseCommandQueue(commandQueue);if (kernel != 0)clReleaseKernel(kernel);if (program != 0)clReleaseProgram(program);if (context != 0)clReleaseContext(context);}///
// main() for HelloWorld example
//
int main(int argc, char** argv)
{cl_context context = 0;cl_command_queue commandQueue = 0;cl_program program = 0;cl_device_id device = 0;cl_kernel kernel = 0;cl_mem memObjects[3] = { 0, 0, 0 };cl_int errNum;// Create an OpenCL context on first available platformcontext = CreateContext();if (context == NULL){std::cerr << "Failed to create OpenCL context." << std::endl;return 1;}// Create a command-queue on the first device available// on the created contextcommandQueue = CreateCommandQueue(context, &device);if (commandQueue == NULL){Cleanup(context, commandQueue, program, kernel, memObjects);return 1;}// Create OpenCL program from HelloWorld.cl kernel sourceprogram = CreateProgram(context, device, "HelloWorld.cl");if (program == NULL){Cleanup(context, commandQueue, program, kernel, memObjects);return 1;}// Create OpenCL kernelkernel = clCreateKernel(program, "hello_kernel", NULL);if (kernel == NULL){std::cerr << "Failed to create kernel" << std::endl;Cleanup(context, commandQueue, program, kernel, memObjects);return 1;}// Create memory objects that will be used as arguments to// kernel. First create host memory arrays that will be// used to store the arguments to the kernelfloat result[ARRAY_SIZE];float a[ARRAY_SIZE];float b[ARRAY_SIZE];for (int i = 0; i < ARRAY_SIZE; i++){a[i] = (float)i;b[i] = (float)(i * 2);}if (!CreateMemObjects(context, memObjects, a, b)){Cleanup(context, commandQueue, program, kernel, memObjects);return 1;}// Set the kernel arguments (result, a, b)errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]);errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]);errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]);if (errNum != CL_SUCCESS){std::cerr << "Error setting kernel arguments." << std::endl;Cleanup(context, commandQueue, program, kernel, memObjects);return 1;}size_t globalWorkSize[1] = { ARRAY_SIZE };size_t localWorkSize[1] = { 1 };// Queue the kernel up for execution across the arrayerrNum = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,globalWorkSize, localWorkSize,0, NULL, NULL);if (errNum != CL_SUCCESS){std::cerr << "Error queuing kernel for execution." << std::endl;Cleanup(context, commandQueue, program, kernel, memObjects);return 1;}// Read the output buffer back to the HosterrNum = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE,0, ARRAY_SIZE * sizeof(float), result,0, NULL, NULL);if (errNum != CL_SUCCESS){std::cerr << "Error reading result buffer." << std::endl;Cleanup(context, commandQueue, program, kernel, memObjects);return 1;}// Output the result bufferfor (int i = 0; i < ARRAY_SIZE; i++){std::cout << result[i] << " ";}std::cout << std::endl;std::cout << "Executed program succesfully." << std::endl;Cleanup(context, commandQueue, program, kernel, memObjects);return 0;
}
代码结构如下:
HelloWorld文件夹目录下有以下三个文件
--------CMakeLists.txt
--------HelloWorld.cl
--------HelloWorld.cpp
在命令终端CD 到 HelloWorld文件目录下:
mkdir build 创建build文件夹
cd 到build文件目录下:
然后输入: cmake ../
cmake ../
命令终端输出如下:
-- The C compiler identification is GNU 7.5.0
-- The CXX compiler identification is GNU 7.5.0
-- Check for working C compiler: /usr/bin/cc
-- Check for working C compiler: /usr/bin/cc -- works
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Detecting C compile features
-- Detecting C compile features - done
-- Check for working CXX compiler: /usr/bin/c++
-- Check for working CXX compiler: /usr/bin/c++ -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Looking for CL_VERSION_2_2
-- Looking for CL_VERSION_2_2 - found
-- Found OpenCL: /usr/lib/libOpenCL.so (found version "2.2")
-- Configuring done
-- Generating done
-- Build files have been written to: /data/lost+found/clproject1/opencl-book-samples/src/Chapter_2/HelloWorld/build
接着输入 make 命令:
make
输出结果如下:
[ 50%] Building CXX object CMakeFiles/HelloWorld.dir/HelloWorld.cpp.o
[100%] Linking CXX executable HelloWorld
[100%] Built target HelloWorld
这样就编译成功了,直接在命令行中输入:
./HelloWorld
运行成功后结果如下:
0 3 6 9 12 15 18 21 24 27 30 33 36 39 42 45 48 51 54 57 60 63 66 69 72 75 78 81 84 87 90 93 96 99 102 105 108 111 114 117 120 123 126 129 132 135 138 141 144 147 150 153 156 159 162 165 168 171 174 177 180 183 186 189 192 195 198 201 204 207 210 213 216 219 222 225 228 231 234 237 240 243 246 249 252 255 258 261 264 267 270 273 276 279 282 285 288 291 294 297
Executed program succesfully
参考文献:《OpenCL编程指南》