/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /*! * \link * \file all2all.hpp * \ingroup building_blocks * * \brief FastFlow all-2-all building block * * @detail FastFlow basic contanier for a shared-memory parallel activity * */ #ifndef FF_A2A_HPP #define FF_A2A_HPP /* *************************************************************************** * * FastFlow is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License version 3 as * published by the Free Software Foundation. * Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3 * or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT) * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * **************************************************************************** */ #include #include namespace ff { // forward declarations static ff_node* ispipe_getlast(ff_node*); class ff_a2a: public ff_node { friend class ff_farm; friend class ff_pipeline; protected: inline int cardinality(BARRIER_T * const barrier) { int card=0; for(size_t i=0;icardinality(barrier); for(size_t i=0;icardinality(barrier); return card; } inline int prepare() { /* ----------------------- */ if (wraparound) { if (workers2[0]->isMultiOutput()) { // NOTE: we suppose that all others are the same if (workers1[0]->isMultiInput()) { // NOTE: we suppose that all others are the same for(size_t i=0;iset_id(i); internalSupportNodes.push_back(t); workers2[i]->set_output_feedback(t); workers1[j]->set_input_feedback(t); } } } else { // the cardinatlity of the first and second set of workers must be the same if (workers1.size() != workers2.size()) { error("A2A, wrap_around, the workers of the second set are not multi-output nodes so the cardinatlity of the first and second set must be the same\n"); return -1; } if (create_input_buffer(in_buffer_entries, false) <0) { error("A2A, error creating input buffers\n"); return -1; } for(size_t i=0;iset_output_feedback(workers1[i]); } } else { // the cardinatlity of the first and second set of workers must be the same if (workers1.size() != workers2.size()) { error("A2A, wrap_around, the workers of the second set are not multi-output nodes so the cardinatlity of the first and second set must be the same\n"); return -1; } if (!workers1[0]->isMultiInput()) { // we suppose that all others are the same if (create_input_buffer(in_buffer_entries, false) <0) { error("A2A, error creating input buffers\n"); return -1; } for(size_t i=0;iset_output_buffer(workers1[i]->get_in_buffer()); } else { if (create_output_buffer(out_buffer_entries, false) <0) { error("A2A, error creating output buffers\n"); return -1; } for(size_t i=0;iset_input_feedback(workers2[i])<0) { error("A2A, wrap_around, the nodes of the first set are not multi-input\n"); return -1; } } } // blocking stuff -------------------------------------------- pthread_mutex_t *m = NULL; pthread_cond_t *c = NULL; for(size_t i=0;iinit_output_blocking(m,c)) return -1; } if (!workers2[0]->isMultiOutput()) { assert(workers1.size() == workers2.size()); } if (workers1[0]->isMultiInput()) { for(size_t i=0;iinit_input_blocking(m,c)) return -1; } } else { assert(workers1.size() == workers2.size()); for(size_t i=0;iinit_input_blocking(m,c)) return -1; workers2[i]->set_output_blocking(m,c); } } // ----------------------------------------------------------- } // wraparound if (workers1[0]->isFarm() || workers1[0]->isAll2All()) { error("A2A, nodes of the first set cannot be farm or all-to-all\n"); return -1; } if (workers2[0]->isFarm() || workers2[0]->isAll2All()) { error("A2A, nodes of the second set cannot be farm or all-to-all\n"); return -1; } if (workers1[0]->isPipe()) { // all other Workers must be pipe for(size_t i=1;iisPipe()) { error("A2A, workers of the first set are not homogeneous, all of them must be of the same kind of building-block (e.g., all pipelines)\n"); return -1; } } if (!workers1[0]->isMultiOutput()) { error("A2A, workers of the first set can be pipelines but only if they are multi-output (automatic transformation NOT YET SUPPORTED)\n"); return -1; } ff_node* last = ispipe_getlast(workers1[0]); // NOTE: we suppose homogeneous first set assert(last); if (last->isFarm() && !last->isOFarm()) { // standard farm ... if (!isfarm_withcollector(last)) { // ... with no collector svector w1; last->get_out_nodes(w1); if (!w1[0]->isMultiOutput()) { // NOTE: we suppose homogeneous workers error("A2A, workers (farm/ofarm) of the first set are pipelines but their last stage is not multi-output (automatic transformation NOT YET SUPPORTED)\n"); return -1; } } } // since by default the a2a is multi-output, we have to check if its workers // are multi-output if (last->isAll2All()) { svector w1; last->get_out_nodes(w1); if (!w1[0]->isMultiOutput()) { // NOTE: we suppose homogeneous second set error("A2A, workers (a2a) of the first set are pipelines but their last stage is not multi-output (automatic transformation NOT YET SUPPORTED)\n"); return -1; } } } if (workers2[0]->isPipe()) { // all other Workers must be pipe for(size_t i=1;iisPipe()) { error("A2A, workers of the second set are not homogeneous, all of them must be of the same kind (e.g., all pipelines)\n"); return -1; } } if (!workers2[0]->isMultiInput()) { error("A2A, workers of the second set can be pipelines but only if they are multi-input (automatic transformation NOT YET SUPPORTED)\n"); return -1; } // since by default the a2a is multi-input, we have to check if its workers // are multi-input svector w1; workers2[0]->get_in_nodes(w1); if (!w1[0]->isMultiInput()) { // NOTE: we suppose homogeneous second set error("A2A, workers of the second set are pipelines but their first stage is not multi-input (automatic transformation NOT YET SUPPORTED)\n"); return -1; } } // checking L-Workers if (!workers1[0]->isMultiOutput()) { // NOTE: we suppose all others to be the same // the nodes in the first set cannot be multi-input nodes without being // also multi-output if (workers1[0]->isMultiInput()) { // NOTE: we suppose all others to be the same error("A2A, the nodes of the first set cannot be multi-input nodes without being also multi-output (i.e., a composition of nodes). The node must be either standard node or multi-output node or compositions where the second stage is a multi-output node\n"); return -1; } // it is a standard node or a pipeline with a standard node as last stage, so we transform it to a multi-output node for(size_t i=0;iget_barrier(); if (bar) mo->set_barrier(bar); workers1[i] = mo; // replacing old node internalSupportNodes.push_back(mo); } } for(size_t i=0;iondemand_buffer()==0)) { svector w; workers1[i]->get_out_nodes(w); for(size_t k=0;kset_scheduling_ondemand(ondemand_chunk); //workers1[i]->set_scheduling_ondemand(ondemand_chunk); } workers1[i]->set_id(int(i)); } // checking R-Workers if (!workers2[0]->isMultiInput()) { // NOTE: we suppose that all others are the same if (workers2[0]->isMultiOutput()) { error("A2A, the nodes of the second set cannot be multi-output nodes without being also multi-input (i.e., a composition of nodes). The node must be either standard node or multi-input node or compositions where the first stage is a multi-input node\n"); return -1; } // here we have to transform the standard node into a multi-input node for(size_t i=0;iget_barrier(); if (bar) mi->set_barrier(bar); workers2[i] = mi; // replacing old node internalSupportNodes.push_back(mi); } } for(size_t i=0;iset_id(int(i)); } size_t nworkers1 = workers1.size(); size_t nworkers2 = workers2.size(); { int ondemand = workers1[0]->ondemand_buffer(); // NOTE: here we suppose that all nodes in workers1 are homogeneous! svector L; for(size_t i=0;iget_out_nodes(L); if (L.size()==0) L=workers1; svector R; for(size_t i=0;iget_in_nodes(R); if (R.size()==0) R=workers2; for(size_t i=0;iset_output(t); R[i]->set_input(t); } } } if (outputNodes.size()) { svector w; for(size_t i=0;iget_out_nodes(w); if (outputNodes.size() != w.size()) { error("A2A, prepare, invalid state detected\n"); return -1; } for(size_t i=0;iisMultiOutput()) { error("A2A, prepare, invalid state, unexpected multi-output node\n"); return -1; } assert(outputNodes[i]->get_in_buffer() != nullptr); if (w[i]->set_output_buffer(outputNodes[i]->get_in_buffer()) < 0) { error("A2A, prepare, invalid state, setting output buffer\n"); return -1; } } } // blocking stuff -------------------------------------------- pthread_mutex_t *m = NULL; pthread_cond_t *c = NULL; for(size_t i=0;iinit_input_blocking(m,c)) return -1; } for(size_t i=0;iinit_output_blocking(m,c)) return -1; } // ------------------------------------------------------------ prepared = true; return 0; } void *svc(void*) { return FF_EOS; } public: ff_a2a(bool notusedanymore=false, int in_buffer_entries=DEFAULT_BUFFER_CAPACITY, int out_buffer_entries=DEFAULT_BUFFER_CAPACITY, bool fixedsize=FF_FIXED_SIZE):prepared(false),fixedsizeIN(fixedsize),fixedsizeOUT(fixedsize), in_buffer_entries(in_buffer_entries), out_buffer_entries(out_buffer_entries) {} ff_a2a(const ff_a2a& p):prepared(false) { if (p.prepared) { error("ff_a2a, copy constructor, the input all-to-all has already been prepared\n"); return; } workers1 = p.workers1; workers2 = p.workers2; workers1_cleanup = p.workers1_cleanup; workers2_cleanup = p.workers2_cleanup; fixedsizeIN = p.fixedsizeIN; fixedsizeOUT = p.fixedsizeOUT; in_buffer_entries = p.in_buffer_entries; out_buffer_entries = p.out_buffer_entries; wraparound = p.wraparound; ondemand_chunk = p.ondemand_chunk; outputNodes = p.outputNodes; internalSupportNodes = p.internalSupportNodes; // this is a dirty part, we modify a const object..... ff_a2a* dirty= const_cast(&p); dirty->internalSupportNodes.resize(0); } virtual ~ff_a2a() { if (barrier) delete barrier; for(size_t i=0;i int add_firstset(const std::vector & w, int ondemand=0, bool cleanup=false) { if (workers1.size()>0) { error("A2A, add_firstset cannot be called multiple times\n"); return -1; } if (w.size()==0) { error("A2A, try to add zero workers to the first set!\n"); return -1; } for(size_t i=0;i int change_firstset(const std::vector& w, int ondemand=0, bool cleanup=false, bool remove_from_cleanuplist=false) { if (remove_from_cleanuplist) { for(size_t j=0;j=0) internalSupportNodes.erase(internalSupportNodes.begin()+pos); } } workers1.clear(); return add_firstset(w, ondemand, cleanup); } /** * The nodes of the second set must be either standard ff_node or a node that is multi-input. * */ template int add_secondset(const std::vector & w, bool cleanup=false) { if (workers2.size()>0) { error("A2A, add_secondset cannot be called multiple times\n"); return -1; } if (w.size()==0) { error("A2A, try to add zero workers to the second set!\n"); return -1; } for(size_t i=0;i int change_secondset(const std::vector& w, bool cleanup=false, bool remove_from_cleanuplist=false) { if (remove_from_cleanuplist) { for(size_t j=0;j=0) internalSupportNodes.erase(internalSupportNodes.begin()+pos); } } workers2.clear(); return add_secondset(w, cleanup); } bool change_node(ff_node* old, ff_node* n, bool cleanup=false, bool remove_from_cleanuplist=false) { if (prepared) { error("A2A, change_node cannot be called because the A2A has already been prepared\n"); return false; } for(size_t i=0; i=0) internalSupportNodes.erase(internalSupportNodes.begin()+pos); } workers1[i] = n; if (cleanup) internalSupportNodes.push_back(n); return true; } } for(size_t i=0; i=0) internalSupportNodes.erase(internalSupportNodes.begin()+pos); } workers2[i] = n; if (cleanup) internalSupportNodes.push_back(n); return true; } } return false; } void skipfirstpop(bool sk) { for(size_t i=0;iskipfirstpop(sk); skip1pop=sk; } #ifdef DFF_ENABLED void skipallpop(bool sk) { for(size_t i=0;iskipallpop(sk); ff_node::skipallpop(sk); } #endif void blocking_mode(bool blk=true) { blocking_in = blocking_out = blk; } void no_mapping() { default_mapping = false; } void no_barrier() { initial_barrier=false; } int cardinality() const { int card=0; for(size_t i=0;icardinality(); for(size_t i=0;icardinality(); return card; } int run(bool skip_init=false) { if (!skip_init) { #if defined(FF_INITIAL_BARRIER) if (initial_barrier) { // set the initial value for the barrier if (!barrier) barrier = new BARRIER_T; const int nthreads = cardinality(barrier); if (nthreads > MAX_NUM_THREADS) { error("PIPE, too much threads, increase MAX_NUM_THREADS !\n"); return -1; } barrier->barrierSetup(nthreads); } #endif skipfirstpop(true); } if (!prepared) if (prepare()<0) return -1; const size_t nworkers1 = workers1.size(); const size_t nworkers2 = workers2.size(); for(size_t i=0;iblocking_mode(blocking_in); if (!default_mapping) workers1[i]->no_mapping(); if (workers1[i]->run(true)<0) { error("ERROR: A2A, running worker (first set) %d\n", i); return -1; } } for(size_t i=0;iblocking_mode(blocking_in); if (!default_mapping) workers2[i]->no_mapping(); if (workers2[i]->run(true)<0) { error("ERROR: A2A, running worker (second set) %d\n", i); return -1; } } return 0; } int wait() { int ret=0; const size_t nworkers1 = workers1.size(); const size_t nworkers2 = workers2.size(); for(size_t i=0;iwait()<0) { error("A2A, waiting Worker1 thread, id = %d\n",workers1[i]->get_my_id()); ret = -1; } for(size_t i=0;iwait()<0) { error("A2A, waiting Worker2 thread, id = %d\n",workers2[i]->get_my_id()); ret = -1; } return ret; } #ifdef DFF_ENABLED int run_and_wait_end(); #else int run_and_wait_end() { if (isfrozen()) { // TODO error("A2A: Error: feature not yet supported\n"); return -1; } if (run()<0) return -1; if (wait()<0) return -1; return 0; } #endif /** * \brief checks if the node is running * */ bool done() const { const size_t nworkers1 = workers1.size(); const size_t nworkers2 = workers2.size(); for(size_t i=0;idone()) return false; for(size_t i=0;idone()) return false; return true; } void remove_from_cleanuplist(const svector& w) { for(size_t j=0;j=0) internalSupportNodes.erase(internalSupportNodes.begin()+pos); } } const svector& getFirstSet() const { return workers1; } const svector& getSecondSet() const { return workers2; } int ondemand_buffer() const { return ondemand_chunk; } int numThreads() const { return cardinality(); } int set_output(const svector & w) { outputNodes +=w; return 0; } int set_output(ff_node *node) { outputNodes.push_back(node); return 0; } void get_out_nodes(svector&w) { for(size_t i=0;iget_out_nodes(w); if (w.size() == 0) w += getSecondSet(); } void get_out_nodes_feedback(svector& w) { for(size_t i=0;iget_out_nodes_feedback(w); } void get_in_nodes(svector&w) { size_t len=w.size(); for(size_t i=0;iget_in_nodes(w); if (len == w.size()) w += getFirstSet(); } /** * \brief Feedback channel (pattern modifier) * * The last stage output stream will be connected to the first stage * input stream in a cycle (feedback channel) */ int wrap_around() { wraparound=true; return 0;} bool isset_wraparound() { return wraparound; } /* WARNING: if these methods are called after prepare (i.e. after having called * run_and_wait_end/run_then_freeze/run/....) they have no effect. * */ void setFixedSize(bool fs) { fixedsizeIN = fixedsizeOUT = fs; } void setInputQueueLength(int sz, bool fixedsize) { in_buffer_entries = sz; fixedsizeIN = fixedsize; } void setOutputQueueLength(int sz, bool fixedsize) { out_buffer_entries = sz; fixedsizeOUT = fixedsize; } // time functions -------------------------------- const struct timeval getstarttime() const { const struct timeval zero={0,0}; std::vector workertime(workers1.size(),zero); for(size_t i=0;igetstarttime(); std::vector::iterator it= std::max_element(workertime.begin(),workertime.end(),time_compare); return (*it); } const struct timeval getwstartime() const { const struct timeval zero={0,0}; std::vector workertime(workers1.size(),zero); for(size_t i=0;igetwstartime(); std::vector::iterator it= std::max_element(workertime.begin(),workertime.end(),time_compare); return (*it); } const struct timeval getstoptime() const { const struct timeval zero={0,0}; std::vector workertime(workers2.size(),zero); for(size_t i=0;igetstoptime(); std::vector::iterator it= std::max_element(workertime.begin(),workertime.end(),time_compare); return (*it); } const struct timeval getwstoptime() const { const struct timeval zero={0,0}; std::vector workertime(workers2.size(),zero); for(size_t i=0;igetwstoptime(); } std::vector::iterator it= std::max_element(workertime.begin(),workertime.end(),time_compare); return (*it); } double ffTime() { return diffmsec(getstoptime(),getstarttime()); } double ffwTime() { return diffmsec(getwstoptime(),getwstartime()); } #if defined(TRACE_FASTFLOW) void ffStats(std::ostream & out) { out << "--- a2a:\n"; out << "--- L-Workers:\n"; for(size_t i=0;iffStats(out); out << "--- R-Workers:\n"; for(size_t i=0;iffStats(out); } #else void ffStats(std::ostream & out) { out << "FastFlow trace not enabled\n"; } #endif #ifdef DFF_ENABLED virtual bool isSerializable(){ svector outputs; this->get_out_nodes(outputs); for(ff_node* output: outputs) if (!output->isSerializable()) return false; return true; } virtual bool isDeserializable(){ svector inputs; this->get_in_nodes(inputs); for(ff_node* input: inputs) if(!input->isDeserializable()) return false; return true; } #endif protected: bool isMultiInput() const { return true;} bool isMultiOutput() const { return true;} bool isAll2All() const { return true; } int create_input_buffer(int nentries, bool fixedsize=FF_FIXED_SIZE) { size_t nworkers1 = workers1.size(); for(size_t i=0;icreate_input_buffer(nentries,fixedsize)==-1) return -1; return 0; } int create_output_buffer(int nentries, bool fixedsize=FF_FIXED_SIZE) { int id=0; size_t nworkers2 = workers2.size(); for(size_t i=0;iisMultiOutput()) { svector w(1); workers2[i]->get_out_nodes(w); assert(w.size()); for(size_t j=0;jset_id(id++); internalSupportNodes.push_back(t); if (w[j]->isMultiOutput()) { if (w[j]->set_output(t)<0) return -1; } else { if (workers2[i]->set_output(t)<0) return -1; } } } else{ if (workers2[i]->create_output_buffer(nentries,fixedsize)==-1) return -1; id++; } } return 0; } bool init_input_blocking(pthread_mutex_t *&m, pthread_cond_t *&c, bool /*feedback*/=true) { size_t nworkers1 = workers1.size(); for(size_t i=0;iinit_input_blocking(m1,c1)) return false; if (nworkers1==1) { m=m1; c=c1; } } return true; } bool init_output_blocking(pthread_mutex_t *&, pthread_cond_t *&, bool /*feedback*/=true) { size_t nworkers2 = workers2.size(); for(size_t i=0;iinit_output_blocking(m,c)) return false; } return true; } void set_output_blocking(pthread_mutex_t *&m, pthread_cond_t *&c, bool canoverwrite=false) { size_t nworkers2 = workers2.size(); for(size_t i=0;iset_output_blocking(m,c, canoverwrite); } } protected: bool workers1_cleanup=false; bool workers2_cleanup=false; bool prepared, fixedsizeIN, fixedsizeOUT; bool wraparound=false; int in_buffer_entries, out_buffer_entries; int ondemand_chunk=0; svector workers1; // first set, nodes must be multi-output svector workers2; // second set, nodes must be multi-input svector outputNodes; svector internalSupportNodes; }; } // namespace ff #endif /* FF_A2A_HPP */