/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /* *************************************************************************** * * FastFlow is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License version 3 as * published by the Free Software Foundation. * Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3 * or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT) * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * **************************************************************************** */ #ifndef FF_STENCIL_HPP #define FF_STENCIL_HPP #include #include #include #include #include #include namespace ff { template class stencilTask { public: typedef T base_type; stencilTask():InTask(NULL),OutTask(NULL),Xsize(0),Ysize(0),Zsize(0), Xstart(0),Xstop(0),Xstep(0), Ystart(0),Ystop(0),Ystep(0), Zstart(0),Zstop(0),Zstep(0), Youtsize(0) {} stencilTask(T *t, size_t Xsize, size_t Xstart, size_t Xstop, size_t Xstep): InTask(t),Xsize(Xsize),Ysize(1),Zsize(1), Xstart(Xstart),Xstop(Xstop),Xstep(Xstep), Ystart(0),Ystop(0),Ystep(0), Zstart(0),Zstop(0),Zstep(0), Youtsize(0) {} stencilTask(T *t, size_t Xsize, size_t Xstart, size_t Xstop, size_t Xstep, size_t Ysize, size_t Ystart, size_t Ystop, size_t Ystep): InTask(t),Xsize(Xsize),Ysize(Ysize),Zsize(1), Xstart(Xstart),Xstop(Xstop),Xstep(Xstep), Ystart(Ystart),Ystop(Ystop),Ystep(Ystep), Zstart(0),Zstop(1),Zstep(0), Youtsize(0) {} stencilTask(T *t, size_t Xsize, size_t Xstart, size_t Xstop, size_t Xstep, size_t Ysize, size_t Ystart, size_t Ystop, size_t Ystep, size_t Zsize, size_t Zstart, size_t Zstop, size_t Zstep): InTask(t),Xsize(Xsize),Ysize(Ysize),Zsize(Zsize), Xstart(Xstart),Xstop(Xstop),Xstep(Xstep), Ystart(Ystart),Ystop(Ystop),Ystep(Ystep), Zstart(Zstart),Zstop(Zstop),Zstep(Zstep), Youtsize(0) {} stencilTask& operator=(const stencilTask &t) { InTask = t.getInPtr(); OutTask=t.getOutPtr(); Xsize=t.X_size(); Ysize=t.Y_size(); Zsize=t.Z_size(); Xstart=t.X_start(); Xstop=t.X_stop(); Xstep=t.X_step(); Ystart=t.Y_start(); Ystop=t.Y_stop(); Ystep=t.Y_step(); Zstart=t.Z_start(); Zstop=t.Z_stop(); Zstep=t.Z_step(); Youtsize=t.Y_osize(); return *this; } inline size_t X_start() const { return Xstart; } inline size_t X_stop() const { return Xstop; } inline size_t X_step() const { return Xstep; } inline size_t Y_start() const { return Ystart; } inline size_t Y_stop() const { return Ystop; } inline size_t Y_step() const { return Ystep; } inline size_t Z_start() const { return Zstart; } inline size_t Z_stop() const { return Zstop; } inline size_t Z_step() const { return Zstep; } inline size_t X_size() const { return Xsize; } inline size_t Y_size() const { return Ysize; } inline size_t Z_size() const { return Zsize; } inline size_t Y_osize() const { return Youtsize; } inline size_t size() const { return X_size()*Y_size()*Z_size(); } inline size_t bytesize() const { return size()*sizeof(T); } void setX(size_t x1, size_t x2, size_t x3) { Xstart = x1; Xstop = x2; Xstep = x3; } void setY(size_t y1, size_t y2, size_t y3) { Ystart = y1; Ystop = y2; Ystep = y3; } void setZ(size_t z1, size_t z2, size_t z3) { Zstart = z1; Zstop = z2; Zstep = z3; } void setInTask(T *t, size_t X) { if (t) InTask=t; Xsize=X; Ysize=1; Zsize=1; } void setInTask(T *t, size_t X, size_t Y) { if (t) InTask=t; Xsize=X; Ysize=Y; Zsize=1; } void setInTask(T *t, size_t X, size_t Y, size_t Z) { if (t) InTask=t; Xsize=X; Ysize=Y; Zsize=Z; } void setOutTask(T *t, size_t Yout) { if (t) OutTask=t; Youtsize=Yout; } T* getInPtr() const { return InTask;} T* getOutPtr() const { return OutTask;} inline void swap() { T *tmp=InTask; InTask=OutTask; OutTask=tmp;} protected: T *InTask; T *OutTask; size_t Xsize,Ysize,Zsize; size_t Xstart,Xstop,Xstep; size_t Ystart,Ystop,Ystep; size_t Zstart,Zstop,Zstep; size_t Youtsize; }; template class stencil2D: public ff_node { public: typedef ParallelForReduce parloop_t; private: typedef std::function init_F_t; typedef std::function init2_F_t; typedef std::function compute_F_t; typedef std::function reduce_F_t; typedef std::function reduce2_F_t; typedef std::function prepost_F_t; typedef std::function reduceOp_F_t; typedef std::function iterCond_F_t; enum { DEFAULT_STENCIL_CHUNKSIZE = 8 }; static void reduceOpDefault(T&, T) {} public: stencil2D(T *Min, T *Mout, const size_t Xsize, const size_t Ysize, const size_t Youtsize, int nw, int Yradius=1,int Xradius=1,bool ghostcells=false, const size_t chunksize=DEFAULT_STENCIL_CHUNKSIZE): oneShot(true), ghosts(ghostcells),nw(nw), Yradius(Yradius), Xradius(Xradius), chunkSize(chunksize), extraInitInParam(NULL),extraInitOutParam(NULL), initInF1(NULL), initOutF1(NULL),initInF2(NULL),initOutF2(NULL), beforeFor(NULL), computeF(NULL), computeFReduce1(NULL), computeFReduce2(NULL), afterFor(NULL), reduceOp(reduceOpDefault), iterCondition(NULL), identityValue((T)0), reduceVar((T)0), iter(0), maxIter(1) { Task.setInTask(Min, Xsize, Ysize); // TODO assert(Ysize==Youtsize); Task.setOutTask(Mout, Youtsize); skipfirstpop(); } stencil2D(int nw, int Yradius=1, int Xradius=1,bool ghostcells=false, const size_t chunksize=DEFAULT_STENCIL_CHUNKSIZE): oneShot(false), ghosts(ghostcells),nw(nw),Yradius(Yradius),Xradius(Xradius), chunkSize(chunksize), extraInitInParam(NULL),extraInitOutParam(NULL), initInF1(NULL), initOutF1(NULL),initInF2(NULL),initOutF2(NULL), beforeFor(NULL), computeF(NULL), computeFReduce1(NULL), computeFReduce2(NULL), afterFor(NULL), reduceOp(reduceOpDefault), iterCondition(NULL), identityValue((T)0), reduceVar((T)0), iter(0), maxIter(1),ploop(nw,true) { } ~stencil2D() {} void initInFunc(init_F_t F, void *extra) { if (!oneShot) { error("stencil2D: initInFunc: the provided init function will not be called in this configuration\n"); } initInF1 = F; extraInitInParam = extra; } void initInFuncAll(init2_F_t F) { if (!oneShot) { error("stencil2D: initInFunc: the provided init function will not be called in this configuration\n"); } initInF2 = F; } void initOutFunc(init_F_t F, void *extra) { if (!oneShot) { error("stencil2D: initOutFunc: the provided init function will not be called in this configuration\n"); } initOutF1 = F; extraInitOutParam = extra; } void initOutFuncAll(init2_F_t F) { if (!oneShot) { error("stencil2D: initOutFunc: the provided init function will not be called in this configuration\n"); } initOutF2 = F; } void preFunc(prepost_F_t F) { beforeFor = F; } void computeFunc(reduce_F_t F, size_t xstart=0, size_t xstop=0, size_t xstep=1, size_t ystart=0, size_t ystop=0, size_t ystep=1, size_t zstart=0, size_t zstop=0, size_t zstep=1) { Task.setX(xstart,xstop?xstop:Task.X_size(),xstep); Task.setY(ystart,ystop?ystop:Task.Y_size(),ystep); Task.setZ(zstart,zstop?zstop:Task.Z_size(),zstep); computeFReduce1 = F; } void computeFuncAll(reduce2_F_t F, size_t xstart=0, size_t xstop=0, size_t xstep=1, size_t ystart=0, size_t ystop=0, size_t ystep=1, size_t zstart=0, size_t zstop=0, size_t zstep=1) { Task.setX(xstart,xstop?xstop:Task.X_size(),xstep); Task.setY(ystart,ystop?ystop:Task.Y_size(),ystep); Task.setZ(zstart,zstop?zstop:Task.Z_size(),zstep); computeFReduce2 = F; } void postFunc(prepost_F_t F) { afterFor = F; } void reduceFunc(iterCond_F_t I, size_t maxI, reduceOp_F_t R, T iV) { iterCondition = I; maxIter = maxI; reduceOp = R; identityValue=iV; } // swaps input and output matrices inline void swap() { Task.swap(); } inline void *svc(void *task) { if (task != NULL) Task = *((stencilTask*)task); T *Min = Task.getInPtr(); T *Mout = Task.getOutPtr(); if (oneShot && (initInF1 || initInF2)) { const size_t& Xsize = Task.X_size(); const size_t& Ysize = Task.Y_size(); if (initInF1) { ploop.parallel_for(0,Xsize,1,chunkSize,[&](const long i) { for(long j=0; j< Ysize; ++j) Min[i*Xsize+j] = initInF1(i,j, extraInitInParam); }, nw); } else { initInF2(ploop, Min, Xsize, Ysize); } } if (oneShot && (initOutF1 || initOutF2)) { const size_t& Xsize = Task.X_size(); const size_t& Ysize = Task.Y_size(); if (initOutF1) { ploop.parallel_for(0,Xsize,1,chunkSize, [&](const long i) { for(long j=0; j< Ysize; ++j) Mout[i*Xsize+j] = initOutF1(i,j, extraInitOutParam); }, nw); } else { initOutF2(ploop, Mout, Xsize, Ysize); } } { const size_t& Xsize = Task.X_size(); const size_t& Ysize = Task.Y_size(); const size_t& Xstart = Task.X_start(); const size_t& Xstop = Task.X_stop(); const size_t& Xstep = Task.X_step(); const size_t& Ystart = Task.Y_start(); const size_t& Ystop = Task.Y_stop(); const size_t& Ystep = Task.Y_step(); T rVar = reduceVar; rVar = identityValue; iter = 0; swap(); // because of the next swap op if (computeFReduce1) { do { swap(); Min = Task.getInPtr(); Mout = Task.getOutPtr(); if (beforeFor) beforeFor(Min,Xsize, Ysize, rVar); ploop.parallel_reduce(rVar,identityValue, Xstart,Xstop, Xstep, chunkSize, [&](const long i,T &rVar) { for(long j=Ystart; j< Ystop; j+=Ystep) { Mout[i*Xsize+j] = computeFReduce1(i,j,Min,Xsize,Ysize,rVar); } }, reduceOp, nw); if (afterFor) afterFor(Mout,Xsize, Ysize, rVar); } while(++iter Task; parloop_t ploop; }; } // namespace #endif /* FF_STENCIL_HPP */