文章目录
- 0. 引言
- 1. 实现功能概述
- 2. 完整代码
- 3. 代码解释
- 3.1 CUDA 核心逻辑
- 3.2 主机端逻辑
- 3.3 OpenCV 图像处理
- 4. 编译与运行
0. 引言
本文将以一个简单的例子展示如何使用 CUDA 将 RGB 图像转换为灰度图,并结合 OpenCV 完成图像的加载与保存。
1. 实现功能概述
我们要实现以下功能:
- 使用 CUDA 内核,将每个像素从 RGBA 格式转换为灰度值。
- 灰度化公式为:
Gray = 0.299 * R + 0.587 * G + 0.114 * B
。 - 使用 OpenCV 进行图像加载和保存处理。
代码分为三个部分:
- CUDA 核心逻辑:负责灰度化转换的 CUDA 内核实现。
- 主机端逻辑:完成内存管理、数据传递以及对 CUDA 内核的调用。
- OpenCV 图像操作:用于读取和保存图像文件。
2. 完整代码
// rgba_to_greyscale.h
#ifndef RGBA_TO_GREYSCALE_H_
#define RGBA_TO_GREYSCALE_H_#include <cstdint>// Callback function type for handling the output grey image.
typedef void (*CallbackFun)(int32_t height, int32_t width, uint8_t* h_grey_image);// Converts an RGBA image to greyscale using CUDA.
// Parameters:
// - height: Image height.
// - width: Image width.
// - data: Pointer to RGBA image data.
// - callback: Callback function to handle the output grey image.
int32_t RgbaToGreyscale(int32_t height, int32_t width, uint8_t* data, CallbackFun callback = nullptr);#endif // RGBA_TO_GREYSCALE_H_
// rgba_to_greyscale.cu
#include "rgba_to_greyscale.h"#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <iostream>
#include <cstring>namespace {// CUDA kernel for RGBA to greyscale conversion.
__global__ void RgbaToGreyscaleKernel(const uchar4* rgba_image, uint8_t* grey_image,int32_t num_rows, int32_t num_cols) {const int32_t id = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;if (id < num_rows * num_cols) {const uint8_t r = rgba_image[id].x;const uint8_t g = rgba_image[id].y;const uint8_t b = rgba_image[id].z;grey_image[id] = static_cast<uint8_t>(0.299f * r + 0.587f * g + 0.114f * b);}
}} // namespaceint32_t RgbaToGreyscale(int32_t height, int32_t width, uint8_t* data, CallbackFun callback) {if (data == nullptr) {std::cerr << "Input data is null." << std::endl;return -1;}uchar4* h_rgba_image = reinterpret_cast<uchar4*>(data);int32_t num_pixels = width * height;uchar4* d_rgba_image = nullptr;uint8_t* d_grey_image = nullptr;uint8_t* h_grey_image = nullptr;if (cudaMalloc(&d_rgba_image, sizeof(uchar4) * num_pixels) != cudaSuccess) {std::cerr << "Failed to allocate device memory for RGBA image." << std::endl;return -1;}if (cudaMalloc(&d_grey_image, sizeof(uint8_t) * num_pixels) != cudaSuccess) {std::cerr << "Failed to allocate device memory for greyscale image." << std::endl;cudaFree(d_rgba_image);return -1;}if (cudaHostAlloc(&h_grey_image, sizeof(uint8_t) * num_pixels, cudaHostAllocMapped) != cudaSuccess) {std::cerr << "Failed to allocate host memory for greyscale image." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);return -1;}if (cudaMemcpy(d_rgba_image, h_rgba_image, sizeof(uchar4) * num_pixels, cudaMemcpyHostToDevice) != cudaSuccess) {std::cerr << "Failed to copy RGBA image data to device." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}const int32_t threads_per_block = 16;const int32_t num_blocks = (num_pixels + threads_per_block * threads_per_block - 1) /(threads_per_block * threads_per_block);const dim3 block_size(threads_per_block, threads_per_block);const dim3 grid_size(num_blocks);RgbaToGreyscaleKernel<<<grid_size, block_size>>>(d_rgba_image, d_grey_image, height, width);if (cudaDeviceSynchronize() != cudaSuccess) {std::cerr << "CUDA kernel execution failed." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}if (cudaMemcpy(h_grey_image, d_grey_image, sizeof(uint8_t) * num_pixels, cudaMemcpyDeviceToHost) != cudaSuccess) {std::cerr << "Failed to copy greyscale image data to host." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}if (callback != nullptr) {callback(height, width, h_grey_image);}cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return 0;
}
// main.cpp
#include <opencv2/opencv.hpp>
#include <opencv2/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>#include <iostream>
#include <string>
#include <cstdint>#include "rgba_to_greyscale.h"namespace {// Saves the greyscale image to a file.
void SaveGreyscaleImage(int32_t height, int32_t width, uint8_t* h_grey_image) {const std::string output_file = "out.png";cv::Mat out_image(height, width, CV_8UC1, static_cast<void*>(h_grey_image));if (!cv::imwrite(output_file, out_image)) {std::cerr << "Failed to write the output image: " << output_file << std::endl;}
}} // namespaceint32_t main() {const std::string input_file = "src.png";cv::Mat image = cv::imread(input_file, cv::IMREAD_COLOR);if (image.empty()) {std::cerr << "Failed to read input image: " << input_file << std::endl;return -1;}cv::Mat image_rgba;cv::cvtColor(image, image_rgba, cv::COLOR_BGR2RGBA);if (RgbaToGreyscale(image_rgba.rows, image_rgba.cols, image_rgba.ptr<uint8_t>(), SaveGreyscaleImage) != 0) {std::cerr << "Failed to convert image to greyscale." << std::endl;return -1;}std::cout << "Converted greyscale image saved successfully." << std::endl;return 0;
}
3. 代码解释
3.1 CUDA 核心逻辑
CUDA 内核函数实现每个线程对一个像素的灰度化处理。以下是核心实现代码:
// CUDA kernel for RGBA to greyscale conversion.
__global__ void RgbaToGreyscaleKernel(const uchar4* rgba_image, uint8_t* grey_image,int32_t num_rows, int32_t num_cols) {const int32_t id = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;if (id < num_rows * num_cols) {const uint8_t r = rgba_image[id].x;const uint8_t g = rgba_image[id].y;const uint8_t b = rgba_image[id].z;grey_image[id] = static_cast<uint8_t>(0.299f * r + 0.587f * g + 0.114f * b);}
}
说明:
- 每个线程计算图像中的一个像素点,提升了并行处理效率。
- 输入参数
rgba_image
是 RGBA 格式的像素数据,输出参数grey_image
是灰度图像数据。
3.2 主机端逻辑
主机端负责完成 CUDA 内存管理、内核调用以及数据传输。以下是代码实现:
int32_t RgbaToGreyscale(int32_t height, int32_t width, uint8_t* data, CallbackFun callback) {if (data == nullptr) {std::cerr << "Input data is null." << std::endl;return -1;}uchar4* h_rgba_image = reinterpret_cast<uchar4*>(data);int32_t num_pixels = width * height;uchar4* d_rgba_image = nullptr;uint8_t* d_grey_image = nullptr;uint8_t* h_grey_image = nullptr;if (cudaMalloc(&d_rgba_image, sizeof(uchar4) * num_pixels) != cudaSuccess) {std::cerr << "Failed to allocate device memory for RGBA image." << std::endl;return -1;}if (cudaMalloc(&d_grey_image, sizeof(uint8_t) * num_pixels) != cudaSuccess) {std::cerr << "Failed to allocate device memory for greyscale image." << std::endl;cudaFree(d_rgba_image);return -1;}if (cudaHostAlloc(&h_grey_image, sizeof(uint8_t) * num_pixels, cudaHostAllocMapped) != cudaSuccess) {std::cerr << "Failed to allocate host memory for greyscale image." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);return -1;}if (cudaMemcpy(d_rgba_image, h_rgba_image, sizeof(uchar4) * num_pixels, cudaMemcpyHostToDevice) != cudaSuccess) {std::cerr << "Failed to copy RGBA image data to device." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}const int32_t threads_per_block = 16;const int32_t num_blocks = (num_pixels + threads_per_block * threads_per_block - 1) /(threads_per_block * threads_per_block);const dim3 block_size(threads_per_block, threads_per_block);const dim3 grid_size(num_blocks);RgbaToGreyscaleKernel<<<grid_size, block_size>>>(d_rgba_image, d_grey_image, height, width);if (cudaDeviceSynchronize() != cudaSuccess) {std::cerr << "CUDA kernel execution failed." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}if (cudaMemcpy(h_grey_image, d_grey_image, sizeof(uint8_t) * num_pixels, cudaMemcpyDeviceToHost) != cudaSuccess) {std::cerr << "Failed to copy greyscale image data to host." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}if (callback != nullptr) {callback(height, width, h_grey_image);}cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return 0;
}
主要功能:
- 将图像数据从主机传输到设备。
- 调用 CUDA 内核执行灰度化处理。
- 将处理结果从设备传回主机,并调用回调函数保存图像。
3.3 OpenCV 图像处理
主程序中使用 OpenCV 加载原始图像,将其转换为 RGBA 格式,随后调用 RgbaToGreyscale
完成灰度化处理,最后保存结果图像:
// Saves the greyscale image to a file.
void SaveGreyscaleImage(int32_t height, int32_t width, uint8_t* h_grey_image) {const std::string output_file = "out.png";cv::Mat out_image(height, width, CV_8UC1, static_cast<void*>(h_grey_image));if (!cv::imwrite(output_file, out_image)) {std::cerr << "Failed to write the output image: " << output_file << std::endl;}
}} // namespaceint32_t main() {const std::string input_file = "src.png";cv::Mat image = cv::imread(input_file, cv::IMREAD_COLOR);if (image.empty()) {std::cerr << "Failed to read input image: " << input_file << std::endl;return -1;}cv::Mat image_rgba;cv::cvtColor(image, image_rgba, cv::COLOR_BGR2RGBA);if (RgbaToGreyscale(image_rgba.rows, image_rgba.cols, image_rgba.ptr<uint8_t>(), SaveGreyscaleImage) != 0) {std::cerr << "Failed to convert image to greyscale." << std::endl;return -1;}std::cout << "Converted greyscale image saved successfully." << std::endl;return 0;
}
OpenCV 功能说明:
cv::imread
:加载输入图像。cv::cvtColor
:将图像从 BGR 转换为 RGBA。cv::imwrite
:保存灰度化处理后的图像。
4. 编译与运行
编译命令:
nvcc -o rgba_to_greyscale main.cpp rgba_to_greyscale.cu -lopencv_core -lopencv_highgui -lopencv_imgproc
运行命令:
将输入图片命名为 src.png
,运行程序:
./rgba_to_greyscale
程序执行完成后,灰度图像将保存为 out.png
。