mesytec-mnode/external/taskflow-3.8.0/3rd-party/ff/oclnode.hpp

181 lines
6.2 KiB
C++
Raw Normal View History

2025-01-04 01:25:05 +01:00
/* -*- Mode: C++; tab-width: 2; c-basic-offset: 4; indent-tabs-mode: nil -*- */
/*!
* \file oclnode.hpp
* \ingroup building_blocks
*
* \brief FastFlow OpenCL interface node
*
* This class bridges multicore with GPGPUs using OpenCL
*
*/
/* ***************************************************************************
*
* FastFlow is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License version 3 as
* published by the Free Software Foundation.
* Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3
* or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT)
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
****************************************************************************
*/
/* Mehdi Goli: m.goli@rgu.ac.uk goli.mehdi@gmail.com
* Massimo Torquati: torquati@di.unipi.it
* Marco Aldinucci
*
*/
#ifndef FF_OCLNODE_HPP
#define FF_OCLNODE_HPP
#include <ff/ocl/clEnvironment.hpp>
#include <ff/node.hpp>
#include <vector>
namespace ff{
/*!
* \class ff_oclNode
* \ingroup buiding_blocks
*
* \brief OpenCL specialisation of the ff_node class
*
* Implements the node that is serving as OpenCL device. In general there is one ff_oclNode
* per OpenCL device. Anyway, there is a command queue per device. Concurrency for accessing
* the command queue from different ff_oclNode is managed by FF.
*
*/
class ff_oclNode : public ff_node {
public:
/* cl_device_type - bitfield
#define CL_DEVICE_TYPE_DEFAULT (1 << 0)
#define CL_DEVICE_TYPE_CPU (1 << 1)
#define CL_DEVICE_TYPE_GPU (1 << 2)
#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
#define CL_DEVICE_TYPE_CUSTOM (1 << 4)
#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
*/
// returns the kind of node
virtual fftype getFFType() const { return OCL_WORKER; }
void setDeviceId(cl_device_id id) { deviceId = id; }
void setDeviceType(cl_device_type dt = CL_DEVICE_TYPE_ALL) { dtype = dt; }
cl_device_id getDeviceId() const { return deviceId; }
cl_device_type getDeviceType() const {return dtype;}
int getOCLID() const { return oclId; }
protected:
/**
* \brief Constructor
*
* It construct the OpenCL node for the device.
*
*/
ff_oclNode():oclId(-1),deviceId(NULL), dtype(CL_DEVICE_TYPE_ALL) {
clEnvironment::instance();
};
~ff_oclNode() { }
int svc_init() {
if (oclId < 0) oclId = clEnvironment::instance()->getOCLID();
// the user has set a specific device
if (deviceId != NULL) return 0;
switch (dtype) {
case CL_DEVICE_TYPE_ALL: {
// no user choice, a static greedy algorithm is used to allocate openCL components
ssize_t GPUdevId =clEnvironment::instance()->getGPUDeviceRR();
if( (GPUdevId !=-1) && ( oclId < clEnvironment::instance()->getNumGPU())) {
printf("%d: Allocated a GPU device, the id is %ld\n", oclId, GPUdevId);
deviceId=clEnvironment::instance()->getDevice(GPUdevId);
return 0;
}
// fall back to CPU either GPU has reached its max or there is no GPU available
ssize_t CPUdevId =clEnvironment::instance()->getCPUDevice();
if (CPUdevId != -1) {
printf("%d: Allocated a CPU device as either no GPU device is available or no GPU slot is available (cpuId=%ld)\n",oclId, CPUdevId);
deviceId=clEnvironment::instance()->getDevice(CPUdevId);
return 0;
}
printf("%d: cannot allocate neither a GPU nor a CPU device\n", oclId);
return -1;
} break;
case CL_DEVICE_TYPE_GPU: {
ssize_t GPUdevId =clEnvironment::instance()->getGPUDeviceRR();
if( (GPUdevId !=-1) && ( oclId < clEnvironment::instance()->getNumGPU())) {
printf("%d: Allocated a GPU device, the id is %ld\n", oclId, GPUdevId);
deviceId=clEnvironment::instance()->getDevice(GPUdevId);
return 0;
}
printf("%d: cannot allocate a GPU device\n", oclId);
return -1;
} break;
case CL_DEVICE_TYPE_CPU: {
ssize_t CPUdevId =clEnvironment::instance()->getCPUDevice();
if (CPUdevId != -1) {
printf("%d: Allocated a CPU device (cpuId=%ld)\n",oclId, CPUdevId);
deviceId=clEnvironment::instance()->getDevice(CPUdevId);
return 0;
}
printf("%d: cannot allocate a CPU device\n", oclId);
return -1;
} break;
default : std::cerr << "Unknown/not supported device type\n";
return -1;
}
return 0;
}
void svc_end() {}
protected:
int oclId; // the OpenCL node id
cl_device_id deviceId; // is the id which is provided for user
cl_device_type dtype;
};
/*!
* \class ff_oclNode_t
* \ingroup buiding_blocks
*
* \brief OpenCL specialisation of the ff_node class (typed)
*
*
*/
template<typename IN, typename OUT=IN>
struct ff_oclNode_t: ff_oclNode {
typedef IN in_type;
typedef OUT out_type;
ff_oclNode_t():
GO_ON((OUT*)FF_GO_ON),
EOS((OUT*)FF_EOS),
GO_OUT((OUT*)FF_GO_OUT),
EOS_NOFREEZE((OUT*)FF_EOS_NOFREEZE) {}
OUT *GO_ON, *EOS, *GO_OUT, *EOS_NOFREEZE;
virtual ~ff_oclNode_t() {}
virtual OUT* svc(IN*)=0;
inline void *svc(void *task) { return svc(reinterpret_cast<IN*>(task));};
};
}
#endif /* FF_OCLNODE_HPP */