/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /*! * \link * \file node.hpp * \ingroup building_blocks * * \brief FastFlow ff_node * * @detail FastFlow basic contanier for a shared-memory parallel activity * */ #ifndef FF_NODE_HPP #define FF_NODE_HPP /* *************************************************************************** * * FastFlow is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License version 3 as * published by the Free Software Foundation. * Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3 * or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT) * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * **************************************************************************** */ #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DFF_ENABLED #include #include #include #include #include #endif namespace ff { // distributed rts related type, but always defined struct GroupInterface; static void* FF_EOS = (void*)(ULLONG_MAX); /// automatically propagated static void* FF_EOS_NOFREEZE = (void*)(ULLONG_MAX-1); /// not automatically propagated static void* FF_EOSW = (void*)(ULLONG_MAX-2); /// propagated only by farm's stages static void* FF_GO_ON = (void*)(ULLONG_MAX-3); /// not automatically propagated static void* FF_GO_OUT = (void*)(ULLONG_MAX-4); /// not automatically propagated static void* FF_TAG_MIN = (void*)(ULLONG_MAX-10); /// just a lower bound mark // The FF_GO_OUT is quite similar to the FF_EOS_NOFREEZE. Both of them are not propagated automatically to // the next stage, but while the first one is used to exit the main computation loop and, if this is the case, to be frozen, // the second one is used to exit the computation loop and keep spinning on the input queue waiting for a new task // without being frozen. // EOSW is like EOS but it is not propagated outside a farm pattern. If an emitter receives EOSW in input, // then it will be discarded. // /* optimization levels used in the optimize_static call (see optimize.hpp) */ struct OptLevel { ssize_t max_nb_threads{MAX_NUM_THREADS}; ssize_t max_mapped_threads{MAX_NUM_THREADS}; int verbose_level{0}; bool no_initial_barrier{false}; bool no_default_mapping{false}; bool blocking_mode{false}; bool merge_with_emitter{false}; bool remove_collector{false}; bool merge_farms{false}; bool introduce_a2a{false}; }; struct OptLevel1: OptLevel { OptLevel1() { max_nb_threads=ff_numCores(); // TODO: use the mapper blocking_mode=true; no_initial_barrier=true; remove_collector=true; } }; struct OptLevel2: OptLevel { OptLevel2() { max_nb_threads=ff_numCores(); // TODO: use the mapper blocking_mode=true; no_initial_barrier=true; merge_with_emitter=true; remove_collector=true; merge_farms= true; } }; /* ----------------------------------------------------------------------- */ // This is just a counter, and is used to set the ff_node::tid value. // The _noBarrier counter is to use with threads that are not part of a topology, // such for example stand-alone nodes or manager node or ...etc... static std::atomic_ulong internal_threadCounter{0}; static std::atomic_ulong internal_threadCounter_noBarrier{MAX_NUM_THREADS}; // TODO: Should be rewritten in terms of mapping_utils.hpp #if defined(HAVE_PTHREAD_SETAFFINITY_NP) && !defined(NO_DEFAULT_MAPPING) /* * * \brief Initialize thread affinity * It initializes thread affinity i.e. which cpu the thread should be * assigned. * * \note Linux-specific code * * \param attr is the pthread attribute * \param cpuID is the identifier the core * \return -2 if error, the cpu identifier if successful */ static inline int init_thread_affinity(pthread_attr_t*attr, int cpuId) { // This is linux-specific code cpu_set_t cpuset; CPU_ZERO(&cpuset); int id; if (cpuId<0) { id = threadMapper::instance()->getCoreId(); CPU_SET (id, &cpuset); } else { id = cpuId; CPU_SET (cpuId, &cpuset); } if (pthread_attr_setaffinity_np (attr, sizeof(cpuset), &cpuset)<0) { perror("pthread_attr_setaffinity_np"); return -2; } return id; } #elif !defined(HAVE_PTHREAD_SETAFFINITY_NP) && !defined(NO_DEFAULT_MAPPING) /* * \brief Initializes thread affinity * * It initializes thread affinity i.e. it defines to which core ths thread * should be assigned. * * \return always return -1 because no thread mapping is done */ static inline int init_thread_affinity(pthread_attr_t*,int) { // Ensure that the threadMapper constructor is called threadMapper::instance(); return -1; } #else /* * \brief Initializes thread affinity * * It initializes thread affinity i.e. it defines to which core ths thread * should be assigned. * * \return always return -1 because no thread mapping is done */ static inline int init_thread_affinity(pthread_attr_t*,int) { // Do nothing return -1; } #endif /* HAVE_PTHREAD_SETAFFINITY_NP */ // forward decl /* * \brief Proxy thread routine * */ static void * proxy_thread_routine(void * arg); /*! * \class ff_thread * \ingroup buiding_blocks * * \brief thread container for (leaves) ff_node * * It defines FastFlow's threading abstraction to run ff_node in parallel * in the shared-memory runtime * * \note Should not be used directly, it is called by ff_node */ class ff_thread { friend void * proxy_thread_routine(void *arg); protected: ff_thread(BARRIER_T * barrier=NULL, bool default_mapping=true): tid((size_t)-1),threadid(0), default_mapping(default_mapping), barrier(barrier), stp(true), // only one shot by default spawned(false), freezing(0), frozen(false),isdone(false), init_error(false), attr(NULL) { (void)FF_TAG_MIN; // to avoid warnings /* Attr is NULL, default mutex attributes are used. Upon successful * initialization, the state of the mutex becomes initialized and * unlocked. * */ if (pthread_mutex_init(&mutex,NULL)!=0) { error("FATAL ERROR: ff_thread: pthread_mutex_init fails!\n"); abort(); } if (pthread_cond_init(&cond,NULL)!=0) { error("FATAL ERROR: ff_thread: pthread_cond_init fails!\n"); abort(); } if (pthread_cond_init(&cond_frozen,NULL)!=0) { error("FATAL ERROR: ff_thread: pthread_cond_init fails!\n"); abort(); } } virtual ~ff_thread() {} void thread_routine() { threadid = ff_getThreadID(); #if defined(FF_INITIAL_BARRIER) if (barrier) { barrier->doBarrier(tid); } /* else { * printf("THREAD %ld skip barrier\n", threadid); * } */ #endif void * ret; do { init_error=false; if (svc_init()<0) { error("ff_thread, svc_init failed, thread exit!!!\n"); init_error=true; break; } else { ret = svc(NULL); } svc_end(); if (disable_cancelability()) { error("ff_thread, thread_routine, could not change thread cancelability"); return; } // acquire lock. While freezing is true, // freeze and wait. pthread_mutex_lock(&mutex); if (ret != FF_EOS_NOFREEZE && !stp) { if ((freezing == 0) && (ret == FF_EOS)) stp = true; while(freezing==1) { // NOTE: freezing can change to 2 frozen=true; pthread_cond_signal(&cond_frozen); pthread_cond_wait(&cond,&mutex); } } //thawed=true; //pthread_cond_signal(&cond); //frozen=false; if (freezing != 0) freezing = 1; // freeze again next time pthread_mutex_unlock(&mutex); if (enable_cancelability()) { error("ff_thread, thread_routine, could not change thread cancelability"); return; } } while(!stp); if (freezing) { pthread_mutex_lock(&mutex); frozen=true; pthread_cond_signal(&cond_frozen); pthread_mutex_unlock(&mutex); } isdone = true; } int disable_cancelability() { if (pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old_cancelstate)) { perror("pthread_setcanceltype"); return -1; } return 0; } int enable_cancelability() { if (pthread_setcancelstate(old_cancelstate, 0)) { perror("pthread_setcanceltype"); return -1; } return 0; } #if defined(FF_TASK_CALLBACK) virtual void callbackIn(void * =NULL) { } virtual void callbackOut(void * =NULL) { } #endif public: virtual void* svc(void * task) = 0; virtual int svc_init() { return 0; }; virtual void svc_end() {} virtual void set_barrier(BARRIER_T * const b) { barrier=b;} virtual BARRIER_T* get_barrier() const { return barrier; } virtual void no_mapping() { default_mapping=false; } bool get_mapping() const { return default_mapping; } virtual int run(bool=false) { return spawn(); } virtual int spawn(int cpuId=-1) { if (spawned) return -1; if ((attr = (pthread_attr_t*)malloc(sizeof(pthread_attr_t))) == NULL) { error("spawn: pthread can not be created, malloc failed\n"); return -1; } if (pthread_attr_init(attr)) { perror("pthread_attr_init: pthread can not be created."); return -1; } int CPUId = -1; if (default_mapping) init_thread_affinity(attr, cpuId); if (CPUId==-2) return -2; if (barrier) tid= internal_threadCounter.fetch_add(1); else tid= internal_threadCounter_noBarrier.fetch_add(1); int r=0; if ((r=pthread_create(&th_handle, attr, proxy_thread_routine, this)) != 0) { errno=r; perror("pthread_create: pthread creation failed."); barrier?--internal_threadCounter:--internal_threadCounter_noBarrier; return -2; } spawned = true; return CPUId; } virtual int wait() { int r=0; stp=true; if (isfrozen()) { wait_freezing(); thaw(); } if (spawned) { pthread_join(th_handle, NULL); barrier ? --internal_threadCounter: --internal_threadCounter_noBarrier; } if (attr) { if (pthread_attr_destroy(attr)) { error("ERROR: ff_thread.wait: pthread_attr_destroy fails!"); r=-1; } free(attr); attr = NULL; } spawned=false; return r; } virtual int wait_freezing() { pthread_mutex_lock(&mutex); while(!frozen) pthread_cond_wait(&cond_frozen,&mutex); pthread_mutex_unlock(&mutex); return (init_error?-1:0); } virtual void stop() { stp = true; }; virtual void freeze() { stp=false; freezing = 1; } virtual void thaw(bool _freeze=false, ssize_t=-1) { pthread_mutex_lock(&mutex); // if this function is called even if the thread is not // in frozen state, then freezing has to be set to 1 and not 2 //if (_freeze) freezing= (frozen?2:1); // next time freeze again the thread // October 2014, changed the above policy. // If thaw is called and the thread is not in the frozen stage, // then the thread won't fall to sleep at the next freezing point if (_freeze) freezing = 2; // next time freeze again the thread else freezing=0; //assert(thawed==false); frozen=false; pthread_cond_signal(&cond); pthread_mutex_unlock(&mutex); //pthread_mutex_lock(&mutex); //while(!thawed) pthread_cond_wait(&cond, &mutex); //thawed=false; //pthread_mutex_unlock(&mutex); } virtual bool isfrozen() const { return freezing>0;} virtual bool done() const { return isdone || (frozen && !stp);} pthread_t get_handle() const { return th_handle;} inline size_t getTid() const { return tid; } inline size_t getOSThreadId() const { return threadid; } protected: size_t tid; /// unique logical id of the thread size_t threadid; /// OS specific thread ID bool default_mapping; private: BARRIER_T * barrier; /// A \p Barrier object bool stp; bool spawned; int freezing; bool frozen,isdone; bool init_error; pthread_t th_handle; pthread_attr_t *attr; pthread_mutex_t mutex; pthread_cond_t cond; pthread_cond_t cond_frozen; int old_cancelstate; }; static void * proxy_thread_routine(void * arg) { ff_thread & obj = *(ff_thread *)arg; obj.thread_routine(); pthread_exit(NULL); return NULL; } // forward declaration class ff_loadbalancer; class ff_gatherer; /*! * \class ff_node * \ingroup building_blocks * * \brief The FastFlow abstract contanier for a parallel activity (actor). * * Implements \p ff_node, i.e. the general container for a parallel * activity. From the orchestration viewpoint, the process model to * be employed is a CSP/Actor hybrid model where activities (\p * ff_nodes) are named and the data paths between processes are * clearly identified. \p ff_nodes synchronise each another via * abstract units of SPSC communications and synchronisation (namely * 1:1 channels), which models data dependency between two * \p ff_nodes. It is used to encapsulate * sequential portions of code implementing functions. * * \p In a multicore, a ff_node is implemented as non-blocking thread. * It is not and should * not be confused with a task. Typically a \p ff_node uses the 100% of one CPU * context (i.e. one core, either physical or HT, if any). Overall, the number of * ff_nodes running should not exceed the number of logical cores of the platform. * * \p A ff_node behaves as a loop that gets an input (i.e. the parameter of \p svc * method) and produces one or more outputs (i.e. return parameter of \p svc method * or parameter of the \p ff_send_out method that can be called in the \p svc method). * The loop complete on the output of the special value "end-of_stream" (EOS). * The EOS is propagated across channels to the next \p ff_node. * * Key methods are: \p svc_init, \p svc_end (optional), and \p svc (pure virtual, * mandatory). The \p svc_init method is called once at node initialization, * while the \p svn_end method is called after a EOS task has been returned. * * This class is defined in \ref node.hpp */ class ff_node { private: friend class ff_farm; friend class ff_pipeline; friend class ff_map; template friend class ff_nodeSelector; friend class ff_loadbalancer; friend class ff_gatherer; friend class ff_minode; friend class ff_monode; friend class ff_a2a; friend class ff_comb; friend struct internal_mo_transformer; friend struct internal_mi_transformer; #ifdef DFF_ENABLED friend class dGroups; friend class dGroup; #endif private: FFBUFFER * in; ///< Input buffer, built upon SWSR lock-free (wait-free) ///< (un)bounded FIFO queue FFBUFFER * out; ///< Output buffer, built upon SWSR lock-free (wait-free) ///< (un)bounded FIFO queue ssize_t myid; ///< This is the node id, it is valid only for farm's workers ssize_t CPUId; ssize_t neos=1; ///< n. of EOS the node expects to receive before terminating bool myoutbuffer; bool myinbuffer; bool skip1pop; #ifdef DFF_ENABLED bool _skipallpop; #endif bool in_active; // allows to disable/enable input tasks receiving bool my_own_thread; ff_thread * thread; /// A \p thWorker object, which extends the \p ff_thread class bool (*callback)(void *, int, unsigned long,unsigned long, void *); void * callback_arg; BARRIER_T * barrier; /// A \p Barrier object struct timeval tstart; struct timeval tstop; struct timeval wtstart; struct timeval wtstop; double wttime; protected: virtual void set_id(ssize_t id) { myid = id; } // sets how many EOSs the node has to receive before terminating, // it also sets when eosnotify has to be called, by default at each input EOS virtual void set_neos(ssize_t n) { neos = n; } virtual inline bool push(void * ptr) { return out->push(ptr); } virtual inline bool pop(void ** ptr) { if (!in_active) return false; // it does not want to receive data return in->pop(ptr); } virtual inline bool Push(void *ptr, unsigned long retry=((unsigned long)-1), unsigned long ticks=(TICKS2WAIT)) { if (blocking_out) { retry: bool empty=out->empty(); bool r = push(ptr); if (r) { // OK if (empty) pthread_cond_signal(p_cons_c); } else { // FULL struct timespec tv; timedwait_timeout(tv); pthread_mutex_lock(prod_m); pthread_cond_timedwait(prod_c,prod_m,&tv); pthread_mutex_unlock(prod_m); goto retry; } return true; } for(unsigned long i=0;ipop(ptr); if (!r) { // EMPTY struct timespec tv; timedwait_timeout(tv); pthread_mutex_lock(cons_m); pthread_cond_timedwait(cons_c, cons_m,&tv); pthread_mutex_unlock(cons_m); goto retry; } return true; } for(unsigned long i=0;iinit()) return -1; return 0; } virtual int create_input_buffer_mp(int nentries, bool fixedsize=FF_FIXED_SIZE, int neos=1) { if (create_input_buffer(nentries,fixedsize)<0) return -1; // setting multi-producer push in->pushPMF = &FFBUFFER::mp_push; set_neos(neos); return 0; } /** * \brief Creates the output channel * * \param nentries: the number of elements of the buffer * \param fixedsize flag to decide whether the buffer is bound or unbound. * Default is \p true. * * \return 0 if successful, -1 otherwise */ virtual int create_output_buffer(int nentries, bool fixedsize=FF_FIXED_SIZE) { if (out) return -1; if (nentries<=0) return -1; out = new FFBUFFER(nentries,fixedsize); if (!out) return -1; myoutbuffer=true; if (!out->init()) return -1; return 0; } /** * \brief Assign the output channelname to a channel * * Attach the output of a \p ff_node to an existing channel, typically the input * channel of another \p ff_node * * \param o reference to a channel of type \p FFBUFFER * * \return 0 if successful, -1 otherwise */ virtual int set_output_buffer(FFBUFFER * const o) { if (myoutbuffer) return -1; out = o; return 0; } /** * \brief Assign the input channelname to a channel * * Attach the input of a \p ff_node to an existing channel, typically the output * channel of another \p ff_node * * \param i a buffer object of type \p FFBUFFER * * \return 0 if successful, -1 otherwise */ virtual int set_input_buffer(FFBUFFER * const i) { if (myinbuffer) return -1; in = i; return 0; } virtual inline int set_input(const svector &) { return -1;} virtual inline int set_input(ff_node *n) { return set_input_buffer(n->get_in_buffer()); } virtual inline int set_input_feedback(ff_node *) { return -1;} virtual inline int set_output(const svector &) { return -1;} virtual inline int set_output(ff_node *n) { return set_output_buffer(n->get_in_buffer()); } virtual inline int set_output_feedback(ff_node *) { return -1;} virtual inline void set_input_channelid(ssize_t, bool=true) {} virtual int prepare() { prepared=true; return 0; } virtual int dryrun() { if (!prepared) return prepare(); return 0; } virtual void set_scheduling_ondemand(const int /*inbufferentries*/=1) {} virtual int ondemand_buffer() const { return 0;} /** * \brief Run the ff_node * * \return 0 success, -1 otherwise */ virtual int run(bool=false) { if (thread) delete reinterpret_cast(thread); thread = new thWorker(this,neos); if (!thread) return -1; return thread->run(); } #ifdef DFF_ENABLED virtual int run(ff_node*, bool=false) {return 0;} #endif /** * \brief Suspend (freeze) the ff_node and run it * * Only initialisation will be performed * * \return 0 success, -1 otherwise */ virtual int freeze_and_run(bool=false) { if (thread) delete reinterpret_cast(thread); thread = new thWorker(this,neos); if (!thread) return 0; freeze(); return thread->run(); } /** * \brief Wait ff_node termination * * \return 0 success, -1 otherwise */ virtual int wait() { if (!thread) return 0; return thread->wait(); } /** * \brief Wait the freezing state * * It will happen on EOS arrival on the input channel * * \return 0 success, -1 otherwise */ virtual int wait_freezing() { if (!thread) return 0; return thread->wait_freezing(); } virtual void stop() { if (!thread) return; thread->stop(); } /** * \brief Freeze (suspend) a ff_node */ virtual void freeze() { if (!thread) return; thread->freeze(); } /** * \brief Thaw (resume) a ff_node */ virtual void thaw(bool _freeze=false, ssize_t=-1) { if (!thread) return; thread->thaw(_freeze); } /** * \brief Checks if a ff_node is frozen * \return \p true is it frozen */ virtual bool isfrozen() const { if (!thread) return false; return thread->isfrozen(); } /** * \brief checks if the node is running * */ virtual bool done() const { if (!thread) return true; return thread->done(); } virtual bool isoutbuffermine() const { return myoutbuffer;} virtual int cardinality(BARRIER_T * const b) { barrier = b; return 1; } virtual int cardinality() const { return 1; } virtual inline void setlb(ff_loadbalancer*,bool=false) {} virtual inline void setgt(ff_gatherer*,bool=false) {} /** * \brief Misure \ref ff::ff_node execution time * * \return time (ms) */ virtual double ffTime() { return diffmsec(tstop,tstart); } /** * \brief Misure \ref ff_node::svc execution time * * \return time (ms) */ virtual double wffTime() { return diffmsec(wtstop,wtstart); } public: /* * \brief Default retry delay in nonblocking get/put on channels */ enum {TICKS2WAIT=1000}; void *const GO_ON = FF_GO_ON; void *const GO_OUT = FF_GO_OUT; void *const EOS_NOFREEZE = FF_EOS_NOFREEZE; void *const EOS = FF_EOS; void *const EOSW = FF_EOSW; ff_node(const ff_node&):ff_node() {} /** * \brief Destructor, polymorphic deletion through base pointer is allowed. * * */ virtual ~ff_node() { if (in && myinbuffer) delete in; if (out && myoutbuffer) delete out; if (thread && my_own_thread) delete reinterpret_cast(thread); if (cons_c && cons_m) { pthread_cond_destroy(cons_c); free(cons_c); cons_c = nullptr; } if (cons_m) { pthread_mutex_destroy(cons_m); free(cons_m); cons_m = nullptr; } if (prod_m) { pthread_mutex_destroy(prod_m); free(prod_m); prod_m = nullptr; } if (prod_c) { pthread_cond_destroy(prod_c); free(prod_c); prod_c = nullptr; } }; /** * \brief The service callback (should be filled by user with parallel activity business code) * * \param task is a the input data stream item pointer (task) * \return output data stream item pointer */ virtual void* svc(void * task) = 0; /** * \brief Service initialisation * * Called after run-time initialisation (e.g. thread spawning) but before * to start to get items from input stream (can be useful for initialisation * of parallel activities, e.g. manual thread pinning that cannot be done in * the costructor because threads stil do not exist). * * \return 0 */ virtual int svc_init() { return 0; } /** * * \brief Service finalisation * * Called after EOS arrived (logical termination) but before shutdding down * runtime support (can be useful for housekeeping) */ virtual void svc_end() {} /** * \brief Node initialisation * * This is a different initialization method with respect to svc_init (the default method). * This can be used to explicitly initialize the object when the node is not running as a thread. * * \return 0 */ virtual int nodeInit() { return 0; } /** * \brief Node finalisation. * * This is a different finalisation method with respect to svc_end (the default method). * This can be used to explicitly finalise the object when the node is not running as a thread. */ virtual void nodeEnd() { } /** * \brief EOS callback * * This method is called when an EOS has just been received from one input channel. * Inside this method it is possible to call ff_send_out to produce data elements in output * (this is not possible in the svc_end method). * The parameter \param id is the ID of the channel that received the EOS. */ virtual void eosnotify(ssize_t /*id*/=-1) {} /** * \brief Returns the number of EOS the node has to receive before terminating. */ virtual ssize_t get_neos() const { return neos;} /** * \brief Returns the identifier of the node (not unique) */ virtual ssize_t get_my_id() const { return myid; }; /** * \brief Returns the OS specific thread id of the node. * * The returned id is valid (>0) only if the node is an active node (i.e. the thread has been created). * */ inline size_t getOSThreadId() const { if (thread) return thread->getOSThreadId(); return 0; } virtual bool change_node(ff_node* old, ff_node* n, bool cleanup=false, bool remove_from_cleanuplist=false) { return false;} /** * Change the size of the outputchannel. * WARNING: this method should not be used if the queue is being used!!!! * */ virtual bool change_outputqueuesize(size_t newsz, size_t &oldsz) { if (!out) { oldsz=0; return false; } oldsz = out->changesize(newsz); return true; } /** * Change the size of the inputchannel. * WARNING: this method should not be used if the queue is being used!!!! * */ virtual bool change_inputqueuesize(size_t newsz, size_t &oldsz) { if (!in) { oldsz=0; return false; } oldsz = in->changesize(newsz); return true; } #if defined(FF_TASK_CALLBACK) virtual void callbackIn(void * =NULL) { } virtual void callbackOut(void * =NULL) { } #endif virtual inline void get_out_nodes(svector&w) { w.push_back(this); } virtual inline void get_out_nodes_feedback(svector&) {} virtual inline void get_in_nodes(svector&w) { w.push_back(this); } virtual inline void get_in_nodes_feedback(svector&) {} /** * \brief Force ff_node-to-core pinning * * \param cpuID is the ID of the CPU to which the thread will be pinned. */ virtual void setAffinity(int cpuID) { if (cpuID<0 || !threadMapper::instance()->checkCPUId(cpuID) ) { error("setAffinity, invalid cpuID\n"); } CPUId=cpuID; } virtual void set_barrier(BARRIER_T * const b) { barrier = b; } virtual BARRIER_T* get_barrier() const { return barrier; } /** * \internal * \brief Gets the CPU id (if set) of this node is pinned * * It gets the ID of the CPU where the ff_node is running. * * \return The identifier of the CPU. */ virtual int getCPUId() const { return CPUId; } /** * \brief Nonblocking put into the input channel * * Wait-free and fence-free (under TSO) * This is called by a different node (e.g., lb) to push data * into the node's input queue. * * \param ptr is a pointer to the task * */ virtual inline bool put(void * ptr) { //return in->push(ptr); return (in->*in->pushPMF)(ptr); } /** * \brief Noblocking pop from the output channel * * Wait-free and fence-free (under TSO) * * \param ptr is a pointer to the task * */ virtual inline bool get(void **ptr) { return out->pop(ptr);} virtual inline void losetime_out(unsigned long ticks=ff_node::TICKS2WAIT) { FFTRACE(lostpushticks+=ticks; ++pushwait); #if defined(SPIN_USE_PAUSE) const long n = (long)ticks/2000; for(int i=0;i<=n;++i) PAUSE(); #else ticks_wait(ticks); #endif /* SPIN_USE_PAUSE */ } virtual inline void losetime_in(unsigned long ticks=ff_node::TICKS2WAIT) { FFTRACE(lostpopticks+=ticks; ++popwait); #if defined(SPIN_USE_PAUSE) const long n = (long)ticks/2000; for(int i=0;i<=n;++i) PAUSE(); #else ticks_wait(ticks); #endif /* SPIN_USE_PAUSE */ } /** * \brief Gets input channel * * It returns a pointer to the input buffer. * * \return A pointer to the input buffer */ virtual FFBUFFER * get_in_buffer() const { return in;} /** * \brief Gets pointer to the output channel * * It returns a pointer to the output buffer. * * \return A pointer to the output buffer. */ virtual FFBUFFER * get_out_buffer() const { return out;} virtual const struct timeval getstarttime() const { return tstart;} virtual const struct timeval getstoptime() const { return tstop;} virtual const struct timeval getwstartime() const { return wtstart;} virtual const struct timeval getwstoptime() const { return wtstop;} #if defined(TRACE_FASTFLOW) virtual void ffStats(std::ostream & out) { out << "ID: " << get_my_id() << " work-time (ms): " << wttime << "\n" << " n. tasks : " << taskcnt << "\n" << " svc ticks : " << tickstot << " (min= " << ticksmin << " max= " << ticksmax << ")\n" << " n. push lost : " << pushwait << " (ticks=" << lostpushticks << ")" << "\n" << " n. pop lost : " << popwait << " (ticks=" << lostpopticks << ")" << "\n"; } virtual double getworktime() const { return wttime; } virtual size_t getnumtask() const { return taskcnt; } virtual ticks getsvcticks() const { return tickstot; } virtual size_t getpushlost() const { return pushwait;} virtual size_t getpoplost() const { return popwait; } #endif /** * \brief Sends out the task * * It allows to emit tasks on output stream without returning from the \p svc method. * Make the ff_node to emit zero or more tasks per input task * * \param task a pointer to the task * \param retry number of tries to put (nonbloking partial) the task to output channel * \param ticks delay between successive retries * */ virtual bool ff_send_out(void * task, int id=-1, unsigned long retry=((unsigned long)-1), unsigned long ticks=(TICKS2WAIT)) { if (callback) return callback(task,id,retry,ticks,callback_arg); bool r =Push(task,retry,ticks); #if defined(FF_TASK_CALLBACK) if (r) callbackOut(); #endif return r; } // Warning resetting queues while the node is running may produce unexpected results. virtual void reset() { if (in) in->reset(); if (out) out->reset(); } /** * checking for multi-input/output, all-to-all, farm, pipe * */ virtual inline bool isMultiInput() const { return false; } virtual inline bool isMultiOutput() const { return false; } virtual inline bool isAll2All() const { return false; } virtual inline bool isFarm() const { return false; } virtual inline bool isOFarm() const { return false; } virtual inline bool isComp() const { return false; } virtual inline bool isPipe() const { return false; } virtual inline void set_multiinput() {} #if defined(FF_REPARA) struct rpr_measure_t { size_t schedule_id; size_t time_before, time_after; size_t problemSize; // computed if the rpr::task_size attribute is defined otherwise is 0 size_t bytesIn, bytesOut; size_t vmSize, vmPeak; double energy; }; using RPR_devices_measure = std::vector > >; using RPR_measures_vector = std::vector >; /** * Returns input data size */ virtual size_t rpr_get_sizeIn() const { return rpr_sizeIn; } /** * Returns output data size */ virtual size_t rpr_get_sizeOut() const { return rpr_sizeOut; } /** * gets/sets energy flag */ virtual bool rpr_get_measure_energy() const { return measureEnergy; } virtual void rpr_set_measure_energy(bool v) { measureEnergy = v; } /** * Returns all measures collected by the node. * The structure is: * - the outermost vector is greater than 1 if the node is a pipeline or a farm * - each stage of a pipeline or a worker of a farm can be a pipeline or a farm as well * therefore the second level vector is grater than 1 only if the stage is a pipeline or a farm * - each entry of a stage is a vector containing info for each device associated to the stage. * The device is identified by the first entry of the std::pair, the second element of the pair * is a vector containing the measurments for the period considered. */ virtual RPR_measures_vector rpr_get_measures() { return RPR_measures_vector(); } protected: bool measureEnergy = false; size_t rpr_sizeIn = {0}; size_t rpr_sizeOut = {0}; #endif /* FF_REPARA */ /** * used for composition (see ff_comb) */ static inline bool ff_send_out_comp(void * task, int, unsigned long /*retry*/,unsigned long /*ticks*/, void *obj) { return ((ff_node *)obj)->push_comp_local(task); } virtual bool push_comp_local(void *task) { (void)task; abort(); // to be removed, just for debugging purposes } virtual inline ssize_t get_channel_id() const { return -1; } /** returns the total number of output channels */ virtual inline size_t get_num_outchannels() const { return 0; } /** returns the total number of input channels */ virtual inline size_t get_num_inchannels() const { return 0; } //(in?1:0); } virtual inline size_t get_num_feedbackchannels() const { return 0; } //(out?1:0);} virtual void propagateEOS(void* task=FF_EOS) { (void)task; } #ifdef DFF_ENABLED std::function serializeF; std::function freetaskF; std::function deserializeF; std::function alloctaskF; virtual bool isSerializable(){ return (bool)serializeF; } virtual bool isDeserializable(){ return (bool)deserializeF; } virtual std::pair getSerializationFunction(){return std::make_pair(serializeF,freetaskF);} virtual std::pair getDeserializationFunction(){ return std::make_pair(deserializeF,alloctaskF);} #endif // always defined, the body will implement a no-op if the distributed runtime is disabled GroupInterface createGroup(std::string); protected: ff_node():in(0),out(0),myid(-1),CPUId(-1), myoutbuffer(false),myinbuffer(false), skip1pop(false), in_active(true), my_own_thread(true), thread(NULL),callback(NULL),barrier(NULL) { time_setzero(tstart);time_setzero(tstop); time_setzero(wtstart);time_setzero(wtstop); wttime=0; FFTRACE(taskcnt=0;lostpushticks=0;pushwait=0;lostpopticks=0;popwait=0;ticksmin=(ticks)-1;ticksmax=0;tickstot=0); p_cons_c = NULL; blocking_in = blocking_out = FF_RUNTIME_MODE; }; // move constructor ff_node(ff_node &&n) { tstart = n.tstart; tstop = n.tstop; wtstart = n.wtstart; wtstop = n.wtstop; wttime = n.wttime; p_cons_c = n.p_cons_c; blocking_in = n.blocking_in; blocking_out = n.blocking_out; default_mapping = n.default_mapping; in_active = n.in_active; cons_m = n.cons_m; cons_c = n.cons_c; prod_m = n.prod_m; prod_c = n.prod_c; barrier = n.barrier; // TODO trace <------ in = n.in; myinbuffer = n.myinbuffer; out = n.out; myoutbuffer = n.myoutbuffer; thread = n.thread; my_own_thread = n.my_own_thread; n.in = nullptr; n.myinbuffer = false; n.out = nullptr; n.myoutbuffer = false; n.thread = nullptr; n.my_own_thread = false; n.barrier = nullptr; n.cons_m = nullptr; n.cons_c = nullptr; n.prod_m = nullptr; n.prod_c = nullptr; } virtual inline void input_active(const bool onoff) { if (in_active != onoff) in_active= onoff; } virtual void registerCallback(bool (*cb)(void *,int,unsigned long,unsigned long,void *), void * arg) { callback=cb; callback_arg=arg; } virtual void registerAllGatherCallback(int (* /*cb*/)(void *,void **, void*), void * /*arg*/) {} /* WARNING: these method must be called before the run() method */ virtual void blocking_mode(bool blk=true) { blocking_in = blocking_out = blk; } virtual void no_barrier() { initial_barrier=false; } virtual void no_mapping() { default_mapping=false; } private: /* ------------------------------------------------------------------------------------- */ class thWorker: public ff_thread { public: thWorker(ff_node * const filter, const ssize_t input_neos=1): ff_thread(filter->barrier, filter->default_mapping),filter(filter),input_neos(input_neos) {} inline bool push(void * task) { /* NOTE: filter->push and not buffer->push because of the filter can be a dnode * * It is not correct to call filter->Push because the filter could be a composition * so the ff_send_out allows to call the callback */ //return filter->Push(task); return filter->ff_send_out(task); } inline bool pop(void ** task) { /* * NOTE: filter->pop and not buffer->pop because of the filter can be a dnode */ return filter->Pop(task); } inline bool put(void * ptr) { return filter->put(ptr);} inline bool get(void **ptr) { return filter->get(ptr);} inline void* svc(void * ) { void * task = NULL; void * ret = FF_EOS; bool inpresent = (filter->get_in_buffer() != NULL); bool outpresent = (filter->get_out_buffer() != NULL); bool skipfirstpop = filter->skipfirstpop(); bool exit=false; bool filter_outpresent = false; size_t neos=input_neos; // if the node is a combine where the last stage is a multi-output if ( filter && ( !outpresent && filter->isMultiOutput() ) ) { filter_outpresent=true; } gettimeofday(&filter->wtstart,NULL); do { #ifdef DFF_ENABLED if (!filter->skipallpop() && inpresent){ #else if (inpresent) { #endif if (!skipfirstpop) pop(&task); else skipfirstpop=false; if ((task == FF_EOS) || (task == FF_EOSW) || (task == FF_EOS_NOFREEZE)) { ret = task; if (--neos > 0) continue; filter->eosnotify(); // only EOS and EOSW are propagated if ( (task == FF_EOS) || (task == FF_EOSW) ) { if (outpresent) push(task); if (filter_outpresent) filter->propagateEOS(); } break; } if (task == FF_GO_OUT) break; } FFTRACE(++filter->taskcnt); FFTRACE(ticks t0 = getticks()); #if defined(FF_TASK_CALLBACK) if (filter) callbackIn(); #endif ret = filter->svc(task); #if defined(TRACE_FASTFLOW) ticks diff=(getticks()-t0); filter->tickstot +=diff; filter->ticksmin=(std::min)(filter->ticksmin,diff); // (std::min) for win portability) filter->ticksmax=(std::max)(filter->ticksmax,diff); #endif if (ret == FF_GO_OUT) break; if (!ret || (ret >= FF_EOSW)) { // EOS or EOS_NOFREEZE or EOSW // NOTE: The EOS is gonna be produced in the output queue // and the thread exits even if there might be some tasks // in the input queue !!! if (!ret) ret = FF_EOS; exit=true; } if ( outpresent && ((ret != FF_GO_ON) && (ret != FF_EOS_NOFREEZE)) ) { push(ret); #if defined(FF_TASK_CALLBACK) if (filter) callbackOut(); #endif } } while(!exit); gettimeofday(&filter->wtstop,NULL); filter->wttime+=diffmsec(filter->wtstop,filter->wtstart); return ret; } int svc_init() { #if !defined(HAVE_PTHREAD_SETAFFINITY_NP) && !defined(NO_DEFAULT_MAPPING) if (filter->default_mapping) { int cpuId = filter->getCPUId(); if (ff_mapThreadToCpu((cpuId<0) ? (cpuId=threadMapper::instance()->getCoreId(tid)) : cpuId)!=0) error("Cannot map thread %d to CPU %d, mask is %u, size is %u, going on...\n",tid, (cpuId<0) ? threadMapper::instance()->getCoreId(tid) : cpuId, threadMapper::instance()->getMask(), threadMapper::instance()->getCListSize()); filter->setCPUId(cpuId); } #endif gettimeofday(&filter->tstart,NULL); return filter->svc_init(); } void svc_end() { filter->svc_end(); gettimeofday(&filter->tstop,NULL); } int run(bool=false) { int CPUId = ff_thread::spawn(filter->getCPUId()); filter->setCPUId(CPUId); return (CPUId==-2)?-1:0; } inline int wait() { return ff_thread::wait();} inline int wait_freezing() { return ff_thread::wait_freezing();} inline void freeze() { ff_thread::freeze();} inline bool isfrozen() const { return ff_thread::isfrozen();} inline bool done() const { return ff_thread::done();} inline int get_my_id() const { return filter->get_my_id(); }; protected: #if defined(FF_TASK_CALLBACK) void callbackIn(void *t=NULL) { filter->callbackIn(t); } void callbackOut(void *t=NULL) { filter->callbackOut(t); } #endif protected: ff_node * const filter; const ssize_t input_neos; }; /* ------------------------------------------------------------------------------------- */ inline void setCPUId(int id) { CPUId = id;} inline void setThread(ff_thread *const th) { my_own_thread = false; thread = th; } inline size_t getTid() const { if (!thread) return (size_t)-1; return thread->getTid(); } protected: #if defined(TRACE_FASTFLOW) size_t taskcnt; ticks lostpushticks; size_t pushwait; ticks lostpopticks; size_t popwait; ticks ticksmin; ticks ticksmax; ticks tickstot; #endif // for the input queue pthread_mutex_t *cons_m = nullptr; pthread_cond_t *cons_c = nullptr; // for the output queue pthread_mutex_t *prod_m = nullptr; pthread_cond_t *prod_c = nullptr; // for synchronizing with the next multi-input stage pthread_cond_t *p_cons_c = nullptr; bool FF_MEM_ALIGN(blocking_in,32); bool FF_MEM_ALIGN(blocking_out,32); bool prepared = false; bool initial_barrier = true; bool default_mapping = true; }; // ff_node /* *************************** Typed node ************************* */ //#ifndef WIN32 //VS12 /*! * \class ff_node_base_t * \ingroup building_blocks * * \brief The FastFlow typed abstract contanier for a parallel activity (actor). * * Key method is: \p svc (pure virtual). * * This class is defined in \ref node.hpp */ template struct ff_node_t: ff_node { typedef IN_t in_type; typedef OUT_t out_type; using ff_node::registerCallback; using ff_node::ff_send_out; ff_node_t(): GO_ON((OUT_t*)FF_GO_ON), EOS((OUT_t*)FF_EOS), EOSW((OUT_t*)FF_EOSW), GO_OUT((OUT_t*)FF_GO_OUT), EOS_NOFREEZE((OUT_t*) FF_EOS_NOFREEZE) { #ifdef DFF_ENABLED /* WARNING: * the definition of functions alloctaskF, freetaskF, serializeF, deserializeF * IS DUPLICATED for the ff_minode_t and ff_monode_t (see file multinode.hpp). * */ if constexpr (traits::has_alloctask_v) { this->alloctaskF = [](char* ptr, size_t sz) -> void* { IN_t* p = nullptr; alloctaskWrapper(ptr, sz, p); assert(p); return p; }; } else { this->alloctaskF = [](char*, size_t ) -> void* { IN_t* o = new IN_t; assert(o); return o; }; } if constexpr (traits::has_freetask_v) { this->freetaskF = [](void* o) { freetaskWrapper(reinterpret_cast(o)); }; } else { this->freetaskF = [](void* o) { if constexpr (!std::is_void_v) { OUT_t* obj = reinterpret_cast(o); delete obj; } }; } // check on Serialization capabilities on the OUTPUT type! if constexpr (traits::is_serializable_v){ this->serializeF = [](void* o, dataBuffer& b) -> bool { bool datacopied = true; std::pair p = serializeWrapper(reinterpret_cast(o,datacopied)); b.setBuffer(p.first, p.second); return datacopied; }; } else if constexpr (cereal::traits::is_output_serializable::value){ this->serializeF = [](void* o, dataBuffer& b) -> bool { std::ostream oss(&b); cereal::PortableBinaryOutputArchive ar(oss); ar << *reinterpret_cast(o); return true; }; } // check on Serialization capabilities on the INPUT type! if constexpr (traits::is_deserializable_v) { this->deserializeF = [this](dataBuffer& b, bool& datacopied) -> void* { IN_t* ptr=(IN_t*)this->alloctaskF(b.getPtr(), b.getLen()); datacopied = deserializeWrapper(b.getPtr(), b.getLen(), ptr); assert(ptr); return ptr; }; } else if constexpr(cereal::traits::is_input_serializable::value){ this->deserializeF = [this](dataBuffer& b, bool& datacopied) -> void* { std::istream iss(&b); cereal::PortableBinaryInputArchive ar(iss); IN_t* o = (IN_t*)this->alloctaskF(nullptr,0); assert(o); ar >> *o; datacopied = true; return o; }; } #endif } OUT_t * const GO_ON, *const EOS, *const EOSW, *const GO_OUT, *const EOS_NOFREEZE; virtual ~ff_node_t() {} virtual OUT_t* svc(IN_t*)=0; inline void *svc(void *task) { return svc(reinterpret_cast(task)); }; private: // deleting some functions that do not have to be used in the svc using ff_node::push; using ff_node::pop; using ff_node::Push; using ff_node::Pop; }; #if (__cplusplus >= 201103L) || (defined __GXX_EXPERIMENTAL_CXX0X__) || (defined(HAS_CXX11_VARIADIC_TEMPLATES)) /*! * \class ff_node_F * \ingroup building_blocks * * \brief The FastFlow typed abstract contanier for a parallel activity (actor). * * Creates an ff_node_t from a lambdas, function pointer, etc * * This class is defined in \ref node.hpp */ template > struct ff_node_F: public ff_node_t { ff_node_F(FUNC f):F(f) {} TOUT* svc(TIN* task) { return F(task, this); } FUNC F; }; #endif //#endif /* ------------------------ internal node implementations, should not be used -------- */ /* just a node interface for the input and output buffers * This is used in the internal implementation but can be used also * at the user level. In this second case */ struct ff_buffernode: ff_node { ff_buffernode() {} ff_buffernode(int nentries, bool fixedsize=FF_FIXED_SIZE, int id=-1, int multi_producer_eos=-1) { set(nentries,fixedsize,id, multi_producer_eos); } // NOTE: this constructor is supposed to be used only for implementing // internal FastFlow features! ff_buffernode(int id, FFBUFFER *in, FFBUFFER *out) { set_id(id); set_input_buffer(in); set_output_buffer(out); } void set(int nentries, bool fixedsize=FF_FIXED_SIZE, int id=-1, int multi_producer_eos=-1) { set_id(id); if (multi_producer_eos<0) { if (create_input_buffer(nentries,fixedsize) < 0) { error("FATAL ERROR: ff_buffernode::set: create_input_buffer fails!\n"); abort(); } set_output_buffer(ff_node::get_in_buffer()); } else { if (create_input_buffer_mp(nentries,fixedsize, multi_producer_eos) < 0) { error("FATAL ERROR: ff_buffernode::set: create_input_buffer_mp fails!\n"); abort(); } set_output_buffer(ff_node::get_in_buffer()); } } int init_blocking_stuff() { // blocking stuff pthread_mutex_t *m = NULL; pthread_cond_t *c = NULL; if (!ff_node::init_output_blocking(m,c)) { error("buffernode, FATAL ERROR, init input blocking mode for accelerator\n"); return -1; } if (!ff_node::init_input_blocking(m,c)) { error("buffernode, FATAL ERROR, init input blocking mode for accelerator\n"); return -1; } return 0; } void reset_blocking_out() { blocking_out = false; } bool ff_send_out(void *ptr, int id=-1, unsigned long retry=((unsigned long)-1), unsigned long ticks=(ff_node::TICKS2WAIT)) { return ff_node::ff_send_out(ptr,id,retry,ticks); } bool gather_task(void **task, unsigned long retry=((unsigned long)-1), unsigned long ticks=(ff_node::TICKS2WAIT)) { bool r =ff_node::Pop(task,retry,ticks); return r; } template bool gather_task(T *&task, unsigned long retry=((unsigned long)-1), unsigned long ticks=(ff_node::TICKS2WAIT)) { return gather_task((void **)&task, retry, ticks); } protected: void* svc(void*){return NULL;} pthread_cond_t &get_cons_c() { return *p_cons_c;} // New Blocking protocol (both for bounded and unbounded buffer): // init_output_blocking initializes prod_* // set_output_blocking sets p_cons_* // init_input_blocking initializes cons_* // sender: // empty=channel.empty(); // r=channel.send() // if (!r) timewait(prod_c); // if empty then signal(p_cons_c) // the channel was empty // receive: // r=channel.receive() // if (!r) timewait(cons_c); // channel empty }; } // namespace ff #endif /* FF_NODE_HPP */