欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

高通平台 msm8953一个可以执行的opencl的例子

程序员文章站 2022-07-12 21:37:12
...

在网上找了一些例子,测试了一下都是无法运行的。下面给出一个在msm8953测试通过的可以运行的openc的实例。当然这个例子也是在别人的基础之上修改的,具体代码如下
1.Android.mk
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/opencl/Android.mk

LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE_TAGS := optional
LOCAL_MODULE    := opencl
LOCAL_SRC_FILES := main.cpp
LOCAL_C_INCLUDES := $(LOCAL_PATH)
LOCAL_C_INCLUDES += $(TARGET_OUT_INTERMEDIATES)/include/adreno/
#./vendor/qcom/proprietary/prebuilt_HY11/target/product/msm8953_64/obj/include/adreno/CL/cl.h
#./vendor/qcom/proprietary/prebuilt_HY11/target/product/msm8953_64/system/vendor/lib64/libOpenCL.so
LOCAL_LDFLAGS += -lOpenCL
include $(BUILD_EXECUTABLE)

2.cl_kernel2.cl
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/opencl/cl_kernel2.cl

__kernel void cl_add(__global int *dst, __global int *src1, __global int *src2)
{
    int index = get_global_id(0);
    dst[index] = src1[index] + src2[index];
}

3.
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/opencl/main.cpp

#include <stdio.h>
#include <stdlib.h>
#include "readyuv.h"
#include "CL/cl.h"
//#include "CL/cl_platform.h"
#include <iostream>
using namespace std;
int main(void)
{
    const int  array_size = 1024;
    cl_uint numPlatforms = 0;
    cl_platform_id platform = nullptr;
    cl_context context = nullptr;
    cl_command_queue commandQueue = nullptr;
    cl_program program = nullptr;
    cl_mem input1MemObj = nullptr;
    cl_mem input2MemObj = nullptr;
    cl_mem outputMemObj = nullptr;
    cl_kernel kernel = nullptr;
    //step1. 查询OpenCL平台集合
    /*
     * 这个函数一般被调用两次:
     *   第一次调用这个函数是获得可用平台的数目, 然后为平台对象分配内存空间
     *   第二次调用用来获取平台对象
     */
    cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
    if (status != CL_SUCCESS)
    {
        cout << "Error: Getting platforms!" << endl;
        return -1;
    }
    if (numPlatforms > 0)
    {
        cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms* sizeof(cl_platform_id));
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        platform = platforms[0];
    }
    else
    {
        puts("Your system does not have any OpenCL platform!");
        return -1;
    }

    cl_uint        numDevices = 0;
    cl_device_id   *devices;
    cl_int errcode_ret ;

    //setp2.获取一个平台上的可用设备清单
    status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
    if (numDevices == 0) //no GPU available.        
    {
        cout << "No GPU device available." << endl;
        cout << "Choose CPU as default device." << endl;
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);
        devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
    }
    else
    {
        devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
        cout << "The number of devices: " << numDevices << endl;
    }
    //step3.创建一个OpenCL上下文
    /*
    *上下文是 OpenCL runtime 用来管理像命令队列、内存队列、程序对象和内核对象,并且在上下文中指定的一个或多个设备上执行内核函数。
    */
    context = clCreateContext(NULL, 1, devices, NULL, NULL, &errcode_ret);
    if(errcode_ret == CL_SUCCESS)
        cout << "craet OpenCL runtime sucess." << endl;
    else
        return -1;
    //step4. 创建命令队列
    commandQueue = clCreateCommandQueue(context, devices[0], 0, &errcode_ret);
    if(errcode_ret == CL_SUCCESS)
        cout << "craet CommandQueue sucess." << endl;
    else
        return -1;

    char *kernelCodeBuffer = nullptr;
    char *Buffer = nullptr;
    const char *aSource = nullptr;
    size_t kernelLength = 0;
    size_t global_work_size[1] = { array_size };

    // Read the kernel code to the buffer
    FILE *fp = fopen("cl_kernel2.cl", "rb");
    if (fp == nullptr)
    {
        puts("The kernel file not found!");
        goto RELEASE_RESOURCES;
    }
    fseek(fp, 0, SEEK_END);
    kernelLength = ftell(fp);
    cout << "kernelLength : " << kernelLength << endl;
    fseek(fp, 0, SEEK_SET);
    kernelCodeBuffer = (char*)malloc(kernelLength + 1);
    Buffer = (char*)malloc(kernelLength + 1);
    fread(kernelCodeBuffer, 1, kernelLength, fp);
    kernelCodeBuffer[kernelLength] = '\0';
    fclose(fp);
    aSource = kernelCodeBuffer;
    strcpy(Buffer,aSource);
    cout << "kernelCodeBuffer : " << Buffer  << endl;

    //step5. 创建程序对象
    program = clCreateProgramWithSource(context, 1, &aSource, &kernelLength, NULL);

    //step6. 编译程序对象
    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);


    // Do initialization
    int i;
    int input1Buffer[array_size];
    int input2Buffer[array_size];
    int outputBuffer[array_size];
    for (i = 0; i < array_size; i++)
        input1Buffer[i] = input2Buffer[i] = i + 1;
    memset(outputBuffer, 0, sizeof(outputBuffer));

    // Create mmory object
    input1MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, array_size * sizeof(int), input1Buffer, nullptr);
    input2MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, array_size * sizeof(int), input2Buffer, nullptr);
    outputMemObj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, array_size * sizeof(int), NULL, NULL);

    //step7.创建内核对象
    kernel = clCreateKernel(program, "cl_add", NULL);

    //step8.设置内核参数,想要执行内核,就必须设置内核参数
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&outputMemObj);
    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&input1MemObj);
    status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&input2MemObj);

    status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
/*
 * clEnqueueNDRangeKernel(
 *
 * cl_command_queue queue,
 *
 * cl_kernel kernel,
 *
 * cl_uint work_dims,
 *  //if you deal with image object, you should probably set work_dims equal 2 or 3. But for buffer objects, you can set whatever dimensionality you think best.
 *  //For a buffer object containing a two-dimensional matrix, you might set work-dims equal 2.
 *
 * const size_t *global_work_offset,
 *  //the global ID offset in each dimension
 *
 * const size_t *global_work_size, 
 *  //the number of work items in each dimension
 * const size_t *local_work_size,
 *  //the number of work_items in a work_group,in each dimension
 * cl_uint num_events,
 *
 * const cl_event *wait_list,
 *
 * cl_event *event)
 */
/*
 * __kernel void cl_add(__global int *dst, __global int *src1, __global int *src2)
 * {
 *     int index = get_global_id(0);//get_global_id获取线程索引,其取值范围是0 到 global_work_size - 1
 *     dst[index] = src1[index] + src2[index];
 * }        
 */
    clFinish(commandQueue);

    //GPU中执行完成后需要Copy结果到内存
    status = clEnqueueReadBuffer(commandQueue, outputMemObj, CL_TRUE, 0, global_work_size[0] * sizeof(int), outputBuffer, 0, NULL, NULL);

    printf("Veryfy the rsults... ");
    for (i = 0; i < array_size; i++)
    {
        if (outputBuffer[i] != (i + 1) * 2)
        {
            puts("Results not correct!");
            break;
        }
    }
    if (i == array_size)
        puts("Correct!");

RELEASE_RESOURCES:

    status = clReleaseKernel(kernel);//*Release kernel.
    status = clReleaseProgram(program);    //Release the program object.
    status = clReleaseMemObject(input1MemObj);//Release mem object.
    status = clReleaseMemObject(input2MemObj);
    status = clReleaseMemObject(outputMemObj);
    status = clReleaseCommandQueue(commandQueue);//Release  Command queue.
    status = clReleaseContext(context);//Release context.
    free(devices);
    getchar();
    return 0;
}
/*
 * clGetPlatformIDs---------------------------获取平台ID
 *
 * clGetDeviceIDs-----------------------------获取设备ID
 *
 * clCreateContext----------------------------创建上下文
 *
 * clCreateCommandQueue-----------------------创建命令队列
 *
 * clCreateBuffer-----------------------------创建设备内存
 *
 * clCreateProgramWithSource------------------创建程序
 *
 * clBuildProgram-----------------------------编译程序
 *
 * clGetProgramBuildInfo----------------------获取编译信息
 *
 * clCreateKernel-----------------------------创建核
 *
 * clSetKernelArg-----------------------------设置核参数
 *
 * clEnqueueNDRangeKernel---------------------执行核
 *
 * clEnqueueReadBuffer------------------------读取设备内存
 *
 * clReleaseMemObject-------------------------释放内存对象
 *
 * clReleaseKernel----------------------------释放核
 *
 * clReleaseCommandQueue----------------------释放命令队列
 *
 * clReleaseContext---------------------------释放上下文
 */

执行编译命令后验证方法如下
一、把文件推送到平台端
1.adb root
2.adb remount
3.adb push out\target\product\msm8953_64\system\bin\opencl data
4.adb push vendor\qcom\proprietary\mm-camera\mm-camera2\opencl\cl_kernel2.cl data
二、执行命令
1 ) chmod 777 data/opencl
2 ) cd data/ (若不执行,会报错误如下The kernel file not found!)
3 ) ./opencl

执行完后可得成功log如下

The number of devices: 1
craet OpenCL runtime sucess.
craet CommandQueue sucess.
kernelLength : 162
kernelCodeBuffer : __kernel void cl_add(__global int *dst, __global int *src1, __global int *src2)
{
    int index = get_global_id(0);
    dst[index] = src1[index] + src2[index];
}

Veryfy the rsults... Correct!