/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /*! * \file clEnvironment.hpp * \ingroup aux_classes * * \brief This file includes the bsic support for OpenCL platforms * * Realises a singleton class that keep the status of the OpenCL platform * creates contexts, command queues etc. */ /* *************************************************************************** * * FastFlow is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License version 3 as * published by the Free Software Foundation. * Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3 * or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT) * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * **************************************************************************** */ /* * Mehdi Goli: m.goli@rgu.ac.uk goli.mehdi@gmail.com * Massimo Torquati: torquati@di.unipi.it * Marco Aldinucci: aldinuc@di.unito.it * */ #ifndef FF_OCLENVIRONMENT_HPP #define FF_OCLENVIRONMENT_HPP #if defined(FF_OPENCL) // to avoid deprecated warnings #if !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) #define CL_USE_DEPRECATED_OPENCL_1_1_APIS 1 #define CL_USE_DEPRECATED_OPENCL_2_0_APIS 1 #endif #ifdef __APPLE__ #include #else #include #endif #include #include #include #include #include // FIX: check if it is possible to remove this include #include #include #include //#include #include namespace ff { static pthread_mutex_t instanceMutex = PTHREAD_MUTEX_INITIALIZER; struct oclParameter { oclParameter(cl_device_id d_id):d_id(d_id){} cl_device_id d_id; cl_context context; cl_command_queue commandQueue; }; /*! * \class clEnvironment * \ingroup aux_classes * * \brief OpenCL platform inspection and setup * * \note Multiple paltforms are not managed. Platforms[0] is always adopted. Support for multiple * platforms will be implemented if needed. * */ class clEnvironment { private: cl_platform_id *platforms; cl_uint numPlatforms; cl_uint numDevices; //cl_device_id* devlist_for_platform; cl_device_id* deviceIds; protected: clEnvironment(): platforms(NULL), numPlatforms(0),lastAssigned(0) { oclId=0; // FIX: what is this ??? #if defined(FF_GPUCOMPONETS) numGPU=FF_GPUCOMPONETS; #else numGPU=10000; #endif clGetPlatformIDs(0, NULL, &numPlatforms); assert(numPlatforms>0); platforms = new cl_platform_id[numPlatforms]; assert(platforms); clGetPlatformIDs(numPlatforms, platforms, NULL); #ifdef FF_OPENCL_LOG if (numPlatforms>1) { printf("Multiple OpenCL platforms detected. Experimental code\n"); } #endif for (unsigned int i = 0; i< numPlatforms; ++i) { clGetDeviceIDs(platforms[i],CL_DEVICE_TYPE_ALL,0,NULL,&(numDevices)); deviceIds = new cl_device_id[numDevices]; assert(deviceIds); // Fill in CLDevice with clGetDeviceIDs() clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL,numDevices,deviceIds,NULL); //std::cerr << "OpenCL platform detection - begin\n"; for(size_t j=0; j::iterator iter=clDevices.begin(); iter < clDevices.end(); ++iter) { cl_device_id dId = *iter; oclParameter* oclParams = new oclParameter(dId); assert(oclParams); cl_int status; oclParams->context = clCreateContext(NULL,1,&dId,NULL,NULL,&status); cl_command_queue_properties prop = 0; oclParams->commandQueue = clCreateCommandQueue(oclParams->context, dId, prop, &status); dynamicParameters[dId]=oclParams; } } public: ~clEnvironment() { } static inline clEnvironment * instance() { while (!m_clEnvironment) { //std::cerr << "clEnvironment instance\n"; pthread_mutex_lock(&instanceMutex); if (!m_clEnvironment) { m_clEnvironment = new clEnvironment(); //std::cerr << "clEnvironment instance\n"; } assert(m_clEnvironment); pthread_mutex_unlock(&instanceMutex); } return m_clEnvironment; } unsigned long getOCLID() { return ++oclId; } /** * allocate multiple GPU devices. * Return a list of allocated GPU devices, * picked from round-robin scan of the device list * * @param n is the number of GPU devices to be allocated * @param preferred_dev is the logical-indexed starting device of the round-robin scan (ignored if <0) * @param exclusive if true, do not consider devices already allocated * @param identical TODO * @return the vector of the logical-indexed allocated GPU devices. * If allocation request cannot be fulfilled, * an empty vector is returned */ std::vector coAllocateGPUDeviceRR(size_t n=1, ssize_t preferred_dev=-1, bool exclusive=false, bool identical=false) { cl_device_type dt; size_t count = n; std::vector ret; pthread_mutex_lock(&instanceMutex); //start from either the user-defined preferred_dev or the last RR-allocated device size_t dev = (preferred_dev>=0)? (preferred_dev%clDevices.size()): lastAssigned; //perform multiple passes over the device list, //stop if no allocation happens in one pass size_t count_pre = count; while (true) { count_pre = count; for (size_t i = 0; i < clDevices.size(); i++) { clGetDeviceInfo(clDevices[dev], CL_DEVICE_TYPE, sizeof(cl_device_type), &(dt), NULL); if ((!clDeviceInUse[dev] | !exclusive) //dev is free or not exclusive mode && ((dt) & CL_DEVICE_TYPE_GPU)) { //dev is a GPU ret.push_back(dev); if (--count == 0) break; } ++dev; dev %= clDevices.size(); } //end pass if(!count) { // commit // TODO check if identical for (size_t i=0; i r = coAllocateGPUDeviceRR(1, false); if (r.size()>0) return r[0]; else return -1; } ssize_t getGPUDevice() { return getGPUDeviceRR(); } ssize_t getCPUDevice(bool exclusive=false) { cl_device_type dt; ssize_t ret=-1; pthread_mutex_lock(&instanceMutex); for(size_t i=0; i getAllGPUDevices() { cl_device_type dt; std::vector ret; for(size_t i=0; i getDevicesInfo( ) { std::vector res; //fprintf(stdout, "%d\n", numDevices); for(size_t j = 0; j < clDevices.size(); j++) { /* char buf[128]; std::string s1, s2; clGetDeviceInfo(clDevices[j], CL_DEVICE_NAME, 128, buf, NULL); //fprintf(stdout, "Device %s supports ", buf); s1 = std::string(buf); clGetDeviceInfo(clDevices[j], CL_DEVICE_VERSION, 128, buf, NULL); //fprintf(stdout, "%s\n", buf); s2 = std::string(buf); size_t max_workgroup_size = 0; clGetDeviceInfo(clDevices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_workgroup_size, NULL); std::stringstream s3; s3 << max_workgroup_size; res.push_back(s1+" "+s2 + "MAX Work Group size " + s3.str()); */ res.push_back(getDeviceInfo(clDevices[j])); } return res; } std::string getDeviceInfo(cl_device_id dev) { char buf[128]; std::string s1, s2; clGetDeviceInfo(dev, CL_DEVICE_NAME, 128, buf, NULL); s1 = std::string(buf); clGetDeviceInfo(dev, CL_DEVICE_VERSION, 128, buf, NULL); s2 = std::string(buf); size_t max_workgroup_size = 0; clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_workgroup_size, NULL); std::stringstream s3; s3 << max_workgroup_size; std::string res; res = s1+" "+s2 + "Max-WorkGroup-size " + s3.str(); return res; } private: clEnvironment(clEnvironment const&){}; clEnvironment& operator=(clEnvironment const&){ return *this;}; private: static clEnvironment * m_clEnvironment; std::atomic_long oclId; std::map dynamicParameters; std::vector clDevices; std::vector clDeviceInUse; size_t lastAssigned; //std::vector clDEviceBusy; int numGPU; }; clEnvironment* clEnvironment::m_clEnvironment = NULL; static inline void printOCLErrorString(cl_int error, std::ostream & out) { switch (error) { case CL_SUCCESS: out << "CL_SUCCESS" << std::endl; break; case CL_DEVICE_NOT_FOUND: out << "CL_DEVICE_NOT_FOUND" << std::endl; break; case CL_DEVICE_NOT_AVAILABLE: out << "CL_DEVICE_NOT_AVAILABLE" << std::endl; break; case CL_COMPILER_NOT_AVAILABLE: out << "CL_COMPILER_NOT_AVAILABLE" << std::endl; break; case CL_MEM_OBJECT_ALLOCATION_FAILURE: out << "CL_MEM_OBJECT_ALLOCATION_FAILURE" << std::endl; break; case CL_OUT_OF_RESOURCES: out << "CL_OUT_OF_RESOURCES" << std::endl; break; case CL_OUT_OF_HOST_MEMORY: out << "CL_OUT_OF_HOST_MEMORY" << std::endl; break; case CL_PROFILING_INFO_NOT_AVAILABLE: out << "CL_PROFILING_INFO_NOT_AVAILABLE" << std::endl; break; case CL_MEM_COPY_OVERLAP: out << "CL_MEM_COPY_OVERLAP" << std::endl; break; case CL_IMAGE_FORMAT_MISMATCH: out << "CL_IMAGE_FORMAT_MISMATCH" << std::endl; break; case CL_IMAGE_FORMAT_NOT_SUPPORTED: out << "CL_IMAGE_FORMAT_NOT_SUPPORTED" << std::endl; break; case CL_BUILD_PROGRAM_FAILURE: out << "CL_BUILD_PROGRAM_FAILURE" << std::endl; break; case CL_MAP_FAILURE: out << "CL_MAP_FAILURE" << std::endl; break; case CL_MISALIGNED_SUB_BUFFER_OFFSET: out << "CL_MISALIGNED_SUB_BUFFER_OFFSET" << std::endl; break; case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: out << "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST" << std::endl; break; case CL_INVALID_VALUE: out << "CL_INVALID_VALUE" << std::endl; break; case CL_INVALID_DEVICE_TYPE: out << "CL_INVALID_DEVICE_TYPE" << std::endl; break; case CL_INVALID_PLATFORM: out << "CL_INVALID_PLATFORM" << std::endl; break; case CL_INVALID_DEVICE: out << "CL_INVALID_DEVICE" << std::endl; break; case CL_INVALID_CONTEXT: out << "CL_INVALID_CONTEXT" << std::endl; break; case CL_INVALID_QUEUE_PROPERTIES: out << "CL_INVALID_QUEUE_PROPERTIES" << std::endl; break; case CL_INVALID_COMMAND_QUEUE: out << "CL_INVALID_COMMAND_QUEUE" << std::endl; break; case CL_INVALID_HOST_PTR: out << "CL_INVALID_HOST_PTR" << std::endl; break; case CL_INVALID_MEM_OBJECT: out << "CL_INVALID_MEM_OBJECT" << std::endl; break; case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: out << "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR" << std::endl; break; case CL_INVALID_IMAGE_SIZE: out << "CL_INVALID_IMAGE_SIZE" << std::endl; break; case CL_INVALID_SAMPLER: out << "CL_INVALID_SAMPLER" << std::endl; break; case CL_INVALID_BINARY: out << "CL_INVALID_BINARY" << std::endl; break; case CL_INVALID_BUILD_OPTIONS: out << "CL_INVALID_BUILD_OPTIONS" << std::endl; break; case CL_INVALID_PROGRAM: out << "CL_INVALID_PROGRAM" << std::endl; break; case CL_INVALID_PROGRAM_EXECUTABLE: out << "CL_INVALID_PROGRAM_EXECUTABLE" << std::endl; break; case CL_INVALID_KERNEL_NAME: out << "CL_INVALID_KERNEL_NAME" << std::endl; break; case CL_INVALID_KERNEL_DEFINITION: out << "CL_INVALID_KERNEL_DEFINITION" << std::endl; break; case CL_INVALID_KERNEL: out << "CL_INVALID_KERNEL" << std::endl; break; case CL_INVALID_ARG_INDEX: out << "CL_INVALID_ARG_INDEX" << std::endl; break; case CL_INVALID_ARG_VALUE: out << "CL_INVALID_ARG_VALUE" << std::endl; break; case CL_INVALID_ARG_SIZE: out << "CL_INVALID_ARG_SIZE" << std::endl; break; case CL_INVALID_KERNEL_ARGS: out << "CL_INVALID_KERNEL_ARGS" << std::endl; break; case CL_INVALID_WORK_DIMENSION: out << "CL_INVALID_WORK_DIMENSION" << std::endl; break; case CL_INVALID_WORK_GROUP_SIZE: out << "CL_INVALID_WORK_GROUP_SIZE" << std::endl; break; case CL_INVALID_WORK_ITEM_SIZE: out << "CL_INVALID_WORK_ITEM_SIZE" << std::endl; break; case CL_INVALID_GLOBAL_OFFSET: out << "CL_INVALID_GLOBAL_OFFSET" << std::endl; break; case CL_INVALID_EVENT_WAIT_LIST: out << "CL_INVALID_EVENT_WAIT_LIST" << std::endl; break; case CL_INVALID_EVENT: out << "CL_INVALID_EVENT" << std::endl; break; case CL_INVALID_OPERATION: out << "CL_INVALID_OPERATION" << std::endl; break; case CL_INVALID_GL_OBJECT: out << "CL_INVALID_GL_OBJECT" << std::endl; break; case CL_INVALID_BUFFER_SIZE: out << "CL_INVALID_BUFFER_SIZE" << std::endl; break; case CL_INVALID_MIP_LEVEL: out << "CL_INVALID_MIP_LEVEL" << std::endl; break; case CL_INVALID_GLOBAL_WORK_SIZE: out << "CL_INVALID_GLOBAL_WORK_SIZE" << std::endl; break; case CL_INVALID_PROPERTY: out << "CL_INVALID_PROPERTY" << std::endl; break; default: out << "Unknown OpenCL error " << error << std::endl; } } static inline bool checkResult(cl_int s, const char* msg) { if(s != CL_SUCCESS) { std::cerr << msg << ":"; printOCLErrorString(s,std::cerr); return (false); // Not Ok } return (true); // Ok } } // namespace #else namespace ff { class clEnvironment{ private: clEnvironment() {} public: static inline clEnvironment * instance() { return NULL; } }; } // namespace #endif /* FASTFLOW_OPENCL */ #endif /* FF_OCLENVIRONMENT_HPP */