How do I use OpenCL from C++?

OpenCL (Open Computing Language) is a framework designed for writing programs that execute across heterogeneous platforms, including CPUs, GPUs, and other processors. Using OpenCL from C++ enables you to harness parallel computing power for high-performance applications.

The following example demonstrates how to set up OpenCL in a C++ application:

#include #include const char *kernelSource = "__kernel void vecAdd(__global const float *a, __global const float *b, __global float *c) {" " int id = get_global_id(0);" " c[id] = a[id] + b[id];" "}"; int main() { const int arraySize = 1024; float a[arraySize], b[arraySize], c[arraySize]; for (int i = 0; i < arraySize; i++) { a[i] = static_cast(i); b[i] = static_cast(i); } cl_platform_id platform; clGetPlatformIDs(1, &platform, NULL); cl_device_id device; clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL); cl_mem aBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, arraySize * sizeof(float), NULL, NULL); cl_mem bBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, arraySize * sizeof(float), NULL, NULL); cl_mem cBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, arraySize * sizeof(float), NULL, NULL); clEnqueueWriteBuffer(queue, aBuffer, CL_TRUE, 0, arraySize * sizeof(float), a, 0, NULL, NULL); clEnqueueWriteBuffer(queue, bBuffer, CL_TRUE, 0, arraySize * sizeof(float), b, 0, NULL, NULL); cl_program program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, NULL); clBuildProgram(program, 1, &device, NULL, NULL, NULL); cl_kernel kernel = clCreateKernel(program, "vecAdd", NULL); clSetKernelArg(kernel, 0, sizeof(cl_mem), &aBuffer); clSetKernelArg(kernel, 1, sizeof(cl_mem), &bBuffer); clSetKernelArg(kernel, 2, sizeof(cl_mem), &cBuffer); size_t globalSize = arraySize; clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, NULL, 0, NULL, NULL); clEnqueueReadBuffer(queue, cBuffer, CL_TRUE, 0, arraySize * sizeof(float), c, 0, NULL, NULL); for (int i = 0; i < 10; i++) { std::cout << c[i] << " "; // Output first 10 results } clReleaseMemObject(aBuffer); clReleaseMemObject(bBuffer); clReleaseMemObject(cBuffer); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseContext(context); return 0; }

OpenCL C++ parallel computing heterogeneous platforms kernel programming