mesytec-mnode/external/taskflow-3.8.0/3rd-party/ff/ocl/clEnvironment.hpp
2025-01-04 01:25:05 +01:00

542 lines
17 KiB
C++

/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
/*!
* \file clEnvironment.hpp
* \ingroup aux_classes
*
* \brief This file includes the bsic support for OpenCL platforms
*
* Realises a singleton class that keep the status of the OpenCL platform
* creates contexts, command queues etc.
*/
/* ***************************************************************************
*
* FastFlow is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License version 3 as
* published by the Free Software Foundation.
* Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3
* or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT)
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
****************************************************************************
*/
/*
* Mehdi Goli: m.goli@rgu.ac.uk goli.mehdi@gmail.com
* Massimo Torquati: torquati@di.unipi.it
* Marco Aldinucci: aldinuc@di.unito.it
*
*/
#ifndef FF_OCLENVIRONMENT_HPP
#define FF_OCLENVIRONMENT_HPP
#if defined(FF_OPENCL)
// to avoid deprecated warnings
#if !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS 1
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS 1
#endif
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#include <pthread.h>
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <iostream> // FIX: check if it is possible to remove this include
#include <sstream>
#include <map>
#include <vector>
//#include <ff/atomic/atomic.h>
#include <atomic>
namespace ff {
static pthread_mutex_t instanceMutex = PTHREAD_MUTEX_INITIALIZER;
struct oclParameter {
oclParameter(cl_device_id d_id):d_id(d_id){}
cl_device_id d_id;
cl_context context;
cl_command_queue commandQueue;
};
/*!
* \class clEnvironment
* \ingroup aux_classes
*
* \brief OpenCL platform inspection and setup
*
* \note Multiple paltforms are not managed. Platforms[0] is always adopted. Support for multiple
* platforms will be implemented if needed.
*
*/
class clEnvironment {
private:
cl_platform_id *platforms;
cl_uint numPlatforms;
cl_uint numDevices;
//cl_device_id* devlist_for_platform;
cl_device_id* deviceIds;
protected:
clEnvironment(): platforms(NULL), numPlatforms(0),lastAssigned(0) {
oclId=0;
// FIX: what is this ???
#if defined(FF_GPUCOMPONETS)
numGPU=FF_GPUCOMPONETS;
#else
numGPU=10000;
#endif
clGetPlatformIDs(0, NULL, &numPlatforms);
assert(numPlatforms>0);
platforms = new cl_platform_id[numPlatforms];
assert(platforms);
clGetPlatformIDs(numPlatforms, platforms, NULL);
#ifdef FF_OPENCL_LOG
if (numPlatforms>1) {
printf("Multiple OpenCL platforms detected. Experimental code\n");
}
#endif
for (unsigned int i = 0; i< numPlatforms; ++i) {
clGetDeviceIDs(platforms[i],CL_DEVICE_TYPE_ALL,0,NULL,&(numDevices));
deviceIds = new cl_device_id[numDevices];
assert(deviceIds);
// Fill in CLDevice with clGetDeviceIDs()
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL,numDevices,deviceIds,NULL);
//std::cerr << "OpenCL platform detection - begin\n";
for(size_t j=0; j<numDevices; j++) {
// estimating max number of thread per device
cl_bool b;
cl_context context;
cl_int status;
cl_device_type dt;
clGetDeviceInfo(deviceIds[j], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &(b), NULL);
context = clCreateContext(NULL,1,&deviceIds[j],NULL,NULL,&status);
clGetDeviceInfo(deviceIds[j], CL_DEVICE_TYPE, sizeof(cl_device_type), &(dt), NULL);
// if((dt) & CL_DEVICE_TYPE_GPU)
// std::cerr << "#" << j << " CPU device\n";
// else if((dt) & CL_DEVICE_TYPE_CPU)
// std::cerr << "#" << j << " GPU device\n";
// else std::cerr << "#" << j << " Other device (not yet implemented)\n";
if((b & CL_TRUE) && (status == CL_SUCCESS)) {
clDeviceInUse.push_back(false);
clDevices.push_back(deviceIds[j]);
}
clReleaseContext(context);
}
}
delete [] deviceIds;
delete [] platforms;
//std::cerr << "OpenCL platform detection - end \n";
// prepare per device parameters: context and command queue
for(std::vector<cl_device_id>::iterator iter=clDevices.begin(); iter < clDevices.end(); ++iter) {
cl_device_id dId = *iter;
oclParameter* oclParams = new oclParameter(dId);
assert(oclParams);
cl_int status;
oclParams->context = clCreateContext(NULL,1,&dId,NULL,NULL,&status);
cl_command_queue_properties prop = 0;
oclParams->commandQueue = clCreateCommandQueue(oclParams->context, dId, prop, &status);
dynamicParameters[dId]=oclParams;
}
}
public:
~clEnvironment() {
}
static inline clEnvironment * instance() {
while (!m_clEnvironment) {
//std::cerr << "clEnvironment instance\n";
pthread_mutex_lock(&instanceMutex);
if (!m_clEnvironment) {
m_clEnvironment = new clEnvironment();
//std::cerr << "clEnvironment instance\n";
}
assert(m_clEnvironment);
pthread_mutex_unlock(&instanceMutex);
}
return m_clEnvironment;
}
unsigned long getOCLID() { return ++oclId; }
/**
* allocate multiple GPU devices.
* Return a list of allocated GPU devices,
* picked from round-robin scan of the device list
*
* @param n is the number of GPU devices to be allocated
* @param preferred_dev is the logical-indexed starting device of the round-robin scan (ignored if <0)
* @param exclusive if true, do not consider devices already allocated
* @param identical TODO
* @return the vector of the logical-indexed allocated GPU devices.
* If allocation request cannot be fulfilled,
* an empty vector is returned
*/
std::vector<ssize_t> coAllocateGPUDeviceRR(size_t n=1, ssize_t preferred_dev=-1, bool exclusive=false, bool identical=false) {
cl_device_type dt;
size_t count = n;
std::vector<ssize_t> ret;
pthread_mutex_lock(&instanceMutex);
//start from either the user-defined preferred_dev or the last RR-allocated device
size_t dev = (preferred_dev>=0)? (preferred_dev%clDevices.size()): lastAssigned;
//perform multiple passes over the device list,
//stop if no allocation happens in one pass
size_t count_pre = count;
while (true) {
count_pre = count;
for (size_t i = 0; i < clDevices.size(); i++) {
clGetDeviceInfo(clDevices[dev], CL_DEVICE_TYPE,
sizeof(cl_device_type), &(dt), NULL);
if ((!clDeviceInUse[dev] | !exclusive) //dev is free or not exclusive mode
&& ((dt) & CL_DEVICE_TYPE_GPU)) { //dev is a GPU
ret.push_back(dev);
if (--count == 0)
break;
}
++dev;
dev %= clDevices.size();
} //end pass
if(!count) { // commit
// TODO check if identical
for (size_t i=0; i<ret.size();++i)
clDeviceInUse[ret[i]]=true;
lastAssigned=dev;
break;
}
if(count_pre == count) { // roll back
//std::cerr << "Not enough GPUs: aborting\n";
ret.clear();
break;
}
//continue to next pass
}
pthread_mutex_unlock(&instanceMutex);
return ret;
}
ssize_t getGPUDeviceRR(bool exclusive=false) {
std::vector<ssize_t> r = coAllocateGPUDeviceRR(1, false);
if (r.size()>0) return r[0];
else return -1;
}
ssize_t getGPUDevice() { return getGPUDeviceRR(); }
ssize_t getCPUDevice(bool exclusive=false) {
cl_device_type dt;
ssize_t ret=-1;
pthread_mutex_lock(&instanceMutex);
for(size_t i=0; i<clDevices.size(); i++) {
clGetDeviceInfo(clDevices[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &(dt), NULL);
if ((!clDeviceInUse[i] | !exclusive) && ((dt) & CL_DEVICE_TYPE_CPU)) {
clDeviceInUse[i]=true;
// char buf[128];
// clGetDeviceInfo(clDevices[i], CL_DEVICE_NAME, 128, buf, NULL);
// std::cerr << "clEnvironment: assigned CPU "<< i << " " << buf << "\n";
ret=i;
break;
}
}
pthread_mutex_unlock(&instanceMutex);
if (ret==-1) std::cerr << "CPU not available or in exclusive use: aborting\n";
return ret;
}
void releaseDevice(ssize_t id) {
std::cerr << "Not yet implemented\n";
}
std::vector<ssize_t> getAllGPUDevices() {
cl_device_type dt;
std::vector<ssize_t> ret;
for(size_t i=0; i<clDevices.size(); i++) {
clGetDeviceInfo(clDevices[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &(dt), NULL);
if((dt) & CL_DEVICE_TYPE_GPU)
ret.push_back(i);
}
return ret;
}
int getNumGPU() const { return numGPU; }
inline cl_device_id getDevice(size_t id) const { return clDevices[id]; }
oclParameter *getParameter(cl_device_id id) { return dynamicParameters[id]; }
std::vector<std::string> getDevicesInfo( ) {
std::vector<std::string> res;
//fprintf(stdout, "%d\n", numDevices);
for(size_t j = 0; j < clDevices.size(); j++) {
/*
char buf[128];
std::string s1, s2;
clGetDeviceInfo(clDevices[j], CL_DEVICE_NAME, 128, buf, NULL);
//fprintf(stdout, "Device %s supports ", buf);
s1 = std::string(buf);
clGetDeviceInfo(clDevices[j], CL_DEVICE_VERSION, 128, buf, NULL);
//fprintf(stdout, "%s\n", buf);
s2 = std::string(buf);
size_t max_workgroup_size = 0;
clGetDeviceInfo(clDevices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
&max_workgroup_size, NULL);
std::stringstream s3;
s3 << max_workgroup_size;
res.push_back(s1+" "+s2 + "MAX Work Group size " + s3.str());
*/
res.push_back(getDeviceInfo(clDevices[j]));
}
return res;
}
std::string getDeviceInfo(cl_device_id dev) {
char buf[128];
std::string s1, s2;
clGetDeviceInfo(dev, CL_DEVICE_NAME, 128, buf, NULL);
s1 = std::string(buf);
clGetDeviceInfo(dev, CL_DEVICE_VERSION, 128, buf, NULL);
s2 = std::string(buf);
size_t max_workgroup_size = 0;
clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
&max_workgroup_size, NULL);
std::stringstream s3;
s3 << max_workgroup_size;
std::string res;
res = s1+" "+s2 + "Max-WorkGroup-size " + s3.str();
return res;
}
private:
clEnvironment(clEnvironment const&){};
clEnvironment& operator=(clEnvironment const&){ return *this;};
private:
static clEnvironment * m_clEnvironment;
std::atomic_long oclId;
std::map<cl_device_id, oclParameter*> dynamicParameters;
std::vector<cl_device_id> clDevices;
std::vector<bool> clDeviceInUse;
size_t lastAssigned;
//std::vector<bool> clDEviceBusy;
int numGPU;
};
clEnvironment* clEnvironment::m_clEnvironment = NULL;
static inline void printOCLErrorString(cl_int error, std::ostream & out) {
switch (error) {
case CL_SUCCESS:
out << "CL_SUCCESS" << std::endl;
break;
case CL_DEVICE_NOT_FOUND:
out << "CL_DEVICE_NOT_FOUND" << std::endl;
break;
case CL_DEVICE_NOT_AVAILABLE:
out << "CL_DEVICE_NOT_AVAILABLE" << std::endl;
break;
case CL_COMPILER_NOT_AVAILABLE:
out << "CL_COMPILER_NOT_AVAILABLE" << std::endl;
break;
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
out << "CL_MEM_OBJECT_ALLOCATION_FAILURE" << std::endl;
break;
case CL_OUT_OF_RESOURCES:
out << "CL_OUT_OF_RESOURCES" << std::endl;
break;
case CL_OUT_OF_HOST_MEMORY:
out << "CL_OUT_OF_HOST_MEMORY" << std::endl;
break;
case CL_PROFILING_INFO_NOT_AVAILABLE:
out << "CL_PROFILING_INFO_NOT_AVAILABLE" << std::endl;
break;
case CL_MEM_COPY_OVERLAP:
out << "CL_MEM_COPY_OVERLAP" << std::endl;
break;
case CL_IMAGE_FORMAT_MISMATCH:
out << "CL_IMAGE_FORMAT_MISMATCH" << std::endl;
break;
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
out << "CL_IMAGE_FORMAT_NOT_SUPPORTED" << std::endl;
break;
case CL_BUILD_PROGRAM_FAILURE:
out << "CL_BUILD_PROGRAM_FAILURE" << std::endl;
break;
case CL_MAP_FAILURE:
out << "CL_MAP_FAILURE" << std::endl;
break;
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
out << "CL_MISALIGNED_SUB_BUFFER_OFFSET" << std::endl;
break;
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
out << "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST" << std::endl;
break;
case CL_INVALID_VALUE:
out << "CL_INVALID_VALUE" << std::endl;
break;
case CL_INVALID_DEVICE_TYPE:
out << "CL_INVALID_DEVICE_TYPE" << std::endl;
break;
case CL_INVALID_PLATFORM:
out << "CL_INVALID_PLATFORM" << std::endl;
break;
case CL_INVALID_DEVICE:
out << "CL_INVALID_DEVICE" << std::endl;
break;
case CL_INVALID_CONTEXT:
out << "CL_INVALID_CONTEXT" << std::endl;
break;
case CL_INVALID_QUEUE_PROPERTIES:
out << "CL_INVALID_QUEUE_PROPERTIES" << std::endl;
break;
case CL_INVALID_COMMAND_QUEUE:
out << "CL_INVALID_COMMAND_QUEUE" << std::endl;
break;
case CL_INVALID_HOST_PTR:
out << "CL_INVALID_HOST_PTR" << std::endl;
break;
case CL_INVALID_MEM_OBJECT:
out << "CL_INVALID_MEM_OBJECT" << std::endl;
break;
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
out << "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR" << std::endl;
break;
case CL_INVALID_IMAGE_SIZE:
out << "CL_INVALID_IMAGE_SIZE" << std::endl;
break;
case CL_INVALID_SAMPLER:
out << "CL_INVALID_SAMPLER" << std::endl;
break;
case CL_INVALID_BINARY:
out << "CL_INVALID_BINARY" << std::endl;
break;
case CL_INVALID_BUILD_OPTIONS:
out << "CL_INVALID_BUILD_OPTIONS" << std::endl;
break;
case CL_INVALID_PROGRAM:
out << "CL_INVALID_PROGRAM" << std::endl;
break;
case CL_INVALID_PROGRAM_EXECUTABLE:
out << "CL_INVALID_PROGRAM_EXECUTABLE" << std::endl;
break;
case CL_INVALID_KERNEL_NAME:
out << "CL_INVALID_KERNEL_NAME" << std::endl;
break;
case CL_INVALID_KERNEL_DEFINITION:
out << "CL_INVALID_KERNEL_DEFINITION" << std::endl;
break;
case CL_INVALID_KERNEL:
out << "CL_INVALID_KERNEL" << std::endl;
break;
case CL_INVALID_ARG_INDEX:
out << "CL_INVALID_ARG_INDEX" << std::endl;
break;
case CL_INVALID_ARG_VALUE:
out << "CL_INVALID_ARG_VALUE" << std::endl;
break;
case CL_INVALID_ARG_SIZE:
out << "CL_INVALID_ARG_SIZE" << std::endl;
break;
case CL_INVALID_KERNEL_ARGS:
out << "CL_INVALID_KERNEL_ARGS" << std::endl;
break;
case CL_INVALID_WORK_DIMENSION:
out << "CL_INVALID_WORK_DIMENSION" << std::endl;
break;
case CL_INVALID_WORK_GROUP_SIZE:
out << "CL_INVALID_WORK_GROUP_SIZE" << std::endl;
break;
case CL_INVALID_WORK_ITEM_SIZE:
out << "CL_INVALID_WORK_ITEM_SIZE" << std::endl;
break;
case CL_INVALID_GLOBAL_OFFSET:
out << "CL_INVALID_GLOBAL_OFFSET" << std::endl;
break;
case CL_INVALID_EVENT_WAIT_LIST:
out << "CL_INVALID_EVENT_WAIT_LIST" << std::endl;
break;
case CL_INVALID_EVENT:
out << "CL_INVALID_EVENT" << std::endl;
break;
case CL_INVALID_OPERATION:
out << "CL_INVALID_OPERATION" << std::endl;
break;
case CL_INVALID_GL_OBJECT:
out << "CL_INVALID_GL_OBJECT" << std::endl;
break;
case CL_INVALID_BUFFER_SIZE:
out << "CL_INVALID_BUFFER_SIZE" << std::endl;
break;
case CL_INVALID_MIP_LEVEL:
out << "CL_INVALID_MIP_LEVEL" << std::endl;
break;
case CL_INVALID_GLOBAL_WORK_SIZE:
out << "CL_INVALID_GLOBAL_WORK_SIZE" << std::endl;
break;
case CL_INVALID_PROPERTY:
out << "CL_INVALID_PROPERTY" << std::endl;
break;
default:
out << "Unknown OpenCL error " << error << std::endl;
}
}
static inline bool checkResult(cl_int s, const char* msg) {
if(s != CL_SUCCESS) {
std::cerr << msg << ":";
printOCLErrorString(s,std::cerr);
return (false);
// Not Ok
}
return (true);
// Ok
}
} // namespace
#else
namespace ff {
class clEnvironment{
private:
clEnvironment() {}
public:
static inline clEnvironment * instance() { return NULL; }
};
} // namespace
#endif /* FASTFLOW_OPENCL */
#endif /* FF_OCLENVIRONMENT_HPP */