/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /*! * \link * \file optimize.hpp * \ingroup building_blocks * * \brief FastFlow optimization heuristics * * @detail FastFlow basic container for a shared-memory parallel activity * */ #ifndef FF_OPTIMIZE_HPP #define FF_OPTIMIZE_HPP /* *************************************************************************** * * FastFlow is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License version 3 as * published by the Free Software Foundation. * Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3 * or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT) * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * **************************************************************************** */ /* * Author: Massimo Torquati * */ #include #include #include #include #include #include #include #include namespace ff { typedef enum { OPT_NORMAL = 1, OPT_INFO = 2 } reportkind_t; static inline void opt_report(int verbose_level, reportkind_t kind, const char *str, ...) { if (verbose_level < kind) return; va_list argp; char * p=(char *)malloc(strlen(str)+512); // this is dangerous..... assert(p); strcpy(p, str); va_start(argp, str); vfprintf(stdout, p, argp); va_end(argp); free(p); } /** * This function looks for internal farms with default collector in a farm building block. * The internal default collectors are removed. */ static inline int remove_internal_collectors(ff_farm& farm) { const svector& W = farm.getWorkers(); for(size_t i=0;iisFarm() && !W[i]->isOFarm()) { ff_farm* ifarm = reinterpret_cast(W[i]); if (remove_internal_collectors(*ifarm)<0) return -1; if (ifarm->getCollector() == nullptr) ifarm->remove_collector(); } else { if (W[i]->isPipe()) { ff_pipeline* ipipe = reinterpret_cast(W[i]); OptLevel iopt; iopt.remove_collector=true; if (optimize_static(*ipipe, iopt)<0) return -1; } if (W[i]->isAll2All()) { ff_a2a *a2a = reinterpret_cast(W[i]); const svector& W1 = a2a->getFirstSet(); const svector& W2 = a2a->getSecondSet(); for(size_t j=0;jisPipe()) { ff_pipeline* ipipe=reinterpret_cast(W1[j]); OptLevel iopt; iopt.remove_collector=true; if (optimize_static(*ipipe, iopt)<0) return -1; } } for(size_t j=0;jisPipe()) { ff_pipeline* ipipe=reinterpret_cast(W2[j]); OptLevel iopt; iopt.remove_collector=true; if (optimize_static(*ipipe, iopt)<0) return -1; } } } } } return 0; } /** * It combines the node passed as second parameter with the farm's emitter. * The node is added at the left-hand side of the emitter. * This transformation is logically equivalent to the following pipeline: ff_Pipe<> pipe(node, farm); */ static inline int combine_with_emitter(ff_farm& farm, ff_node*node, bool cleanup_node=false) { if (node->isFarm() || node->isPipe() || node->isAll2All()) { error("combine_with_emitter: the node to combine cannot be a parallel building block\n"); return -1; } ff_node* emitter = farm.getEmitter(); if (!emitter) { farm.add_emitter(node); farm.cleanup_emitter(cleanup_node); return 0; } ff_comb* comb; if (!emitter->isMultiOutput()) { internal_mo_transformer *mo_emitter = new internal_mo_transformer(emitter, farm.isset_cleanup_emitter()); comb = new ff_comb(node, mo_emitter, cleanup_node, true); } else { comb = new ff_comb(node,emitter, cleanup_node, farm.isset_cleanup_emitter()); } if (farm.isset_cleanup_emitter()) farm.cleanup_emitter(false); farm.change_emitter(comb, true); return 0; } /* * It combines the node passed as parameter with the farm's collector. * The node is added at the right-hand side of the collector. * This transformation is logically equivalent to the following pipeline: ff_Pipe<> pipe(farm, node); */ static inline int combine_with_collector(ff_farm& farm, ff_node*node, bool cleanup_node=false) { if (!farm.getCollector()) { error("combine_with_collector: the farm passed as parameter does not have a collector\n"); return -1; } if (node->isFarm() || node->isPipe() || node->isAll2All()) { error("combine_with_emitter: the node to combine cannot be a parallel building block\n"); return -1; } ff_node* collector = farm.getCollector(); ff_comb* comb = new ff_comb(collector, node, farm.isset_cleanup_collector(), cleanup_node); if (farm.isset_cleanup_collector()) farm.cleanup_collector(false); farm.remove_collector(); farm.add_collector(comb, true); return 0; } /* * It combines the node passed as parameter with the first stage of the pipeline. * The node is added at the left-hand side of the first pipeline node. * This transformation is logically equivalent to the following pipeline: ff_Pipe<> pipe2(node, pipe); */ template static inline int combine_with_firststage(ff_pipeline& pipe, T* node, bool cleanup_node) { pipe.flatten(); ff_node* node0 = pipe.get_node(0); // it cannot be a pipeline if (!node0) { error("combine_with_firststage: empty pipeline\n"); return -1; } if (node0->isAll2All()) { return combine_left_with_a2a(*reinterpret_cast(node0), node, cleanup_node); } if (node0->isFarm()) { ff_farm &farm=*(ff_farm*)node0; if (combine_with_emitter(farm, node, cleanup_node)<0) return -1; pipe.remove_stage(0); pipe.insert_stage(0, node0); } else { ff_comb* comb = new ff_comb(node, node0, cleanup_node); pipe.remove_stage(0); pipe.insert_stage(0, comb, true); } return 0; } template static inline int combine_right_with_farm(ff_farm& farm, T* node, bool cleanup_node) { if (farm.hasCollector()) return combine_with_collector(farm, node, cleanup_node); // farm with no collector const svector& w= farm.getWorkers(); assert(w.size()>0); if (w[0]->isPipe()) { // NOTE: we suppose that all workers are homogeneous if (combine_with_laststage(*reinterpret_cast(w[0]), node, cleanup_node)<0) return -1; int r=0; for(size_t i=1;i(w[i]); r+=combine_with_laststage(*pipe, new T(*node), true); } return (r>0?-1:0); } if (w[0]->isFarm()) { if (combine_right_with_farm(*reinterpret_cast(w[0]), node, cleanup_node)<0) return -1; int r=0; for(size_t i=1;i(w[i]); r+=combine_right_with_farm(*farm, new T(*node), true); } return (r>0?-1:0); } if (w[0]->isAll2All()) { if (combine_right_with_a2a(*reinterpret_cast(w[0]), node, cleanup_node)<0) return -1; int r=0; for(size_t i=1;i(w[i]); r+=combine_right_with_a2a(*a2a, new T(*node), true); } return (r>0?-1:0); } bool workers_cleanup = farm.isset_cleanup_workers(); std::vector new_workers; ff_comb* comb = new ff_comb(w[0], node, workers_cleanup, cleanup_node); assert(comb); new_workers.push_back(comb); for(size_t i=1;i static inline int combine_right_with_a2a(ff_a2a& a2a, T* node, bool cleanup_node) { const svector& w= a2a.getSecondSet(); if (w[0]->isPipe()) { // NOTE: we suppose that all workers are homogeneous if (combine_with_laststage(*reinterpret_cast(w[0]), node, cleanup_node)<0) return -1; int r=0; for(size_t i=1;iisPipe()); ff_pipeline* pipe = reinterpret_cast(w[i]); r+=combine_with_laststage(*pipe, new T(*node), true); } return (r>0?-1:0); } std::vector new_secondset; ff_comb* comb = new ff_comb(w[0], node, false, cleanup_node); assert(comb); new_secondset.push_back(comb); for(size_t i=1;i static inline int combine_left_with_a2a(ff_a2a& a2a, T* node, bool cleanup_node) { const svector& w= a2a.getFirstSet(); if (w[0]->isPipe()) { // NOTE: we suppose that all workers are homogeneous if (combine_with_firststage(*reinterpret_cast(w[0]), node, cleanup_node)<0) return -1; int r=0; for(size_t i=1;iisPipe()); ff_pipeline* pipe = reinterpret_cast(w[i]); r+=combine_with_firststage(*pipe, new T(*node), true); } return (r>0?-1:0); } std::vector new_firstset; ff_comb* comb = new ff_comb(node, w[0], cleanup_node, false); assert(comb); new_firstset.push_back(comb); for(size_t i=1;i pipe2(pipe, node); */ template static inline int combine_with_laststage(ff_pipeline& pipe, T* node, bool cleanup_node) { pipe.flatten(); ff_node* last = pipe.get_lastnode(); // it cannot be a pipeline if (!last) { error("combine_with_laststage: empty pipeline\n"); return -1; } if (last->isAll2All()) { return combine_right_with_a2a(*reinterpret_cast(last), node, cleanup_node); } if (last->isFarm()) { return combine_right_with_farm(*reinterpret_cast(last), node, cleanup_node); } bool node_cleanup = pipe.isset_cleanup_nodes(); int nstages=static_cast(pipe.nodes_list.size()); ff_comb* comb = new ff_comb(last, node, node_cleanup , cleanup_node); pipe.remove_stage(nstages-1); pipe.insert_stage((nstages-1)>0?(nstages-1):0, comb, true); return 0; } /* This is farm specific. * - It basically sets the threshold for enabling blocking mode. * - It can remove the collector of internal farms in a farm of farms composition. * - TODO: Farm of farms ---> single farm with two emitters combined (external+internal) */ static inline int optimize_static(ff_farm& farm, const OptLevel& opt=OptLevel1()) { if (farm.prepared) { error("optimize_static (farm) called after prepare\n"); return -1; } // optimizing internal pipelines, if any OptLevel iopt(opt); iopt.blocking_mode = false; iopt.no_initial_barrier = false; iopt.no_default_mapping = false; const svector &Workers = farm.getWorkers(); for(size_t i=0;iisPipe()) { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (farm): Looking for optimizations in the internal pipeline %ld\n",i); ff_pipeline *ipipe = reinterpret_cast(Workers[i]); if (optimize_static(*ipipe, iopt)) return -1; } } // here it looks for internal farms with null collectors if (opt.remove_collector) { auto optimize_internal_farm = [opt](ff_farm& ifarm) { OptLevel iopt; iopt.remove_collector=true; iopt.verbose_level = opt.verbose_level; if (optimize_static(ifarm, iopt)<0) return -1; if (ifarm.getCollector() == nullptr) { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (farm): REMOVE_COLLECTOR: Removed farm collector\n"); ifarm.remove_collector(); } return 0; }; const svector& W = farm.getWorkers(); for(size_t i=0;iisFarm() && !W[i]->isOFarm()) { ff_farm* ifarm = reinterpret_cast(W[i]); opt_report(opt.verbose_level, OPT_NORMAL, "OPT (farm): Looking for optimizations in the internal farm %ld\n",i); if (optimize_internal_farm(*ifarm)<0) return -1; } else { if (W[i]->isPipe()) { ff_pipeline* ipipe = reinterpret_cast(W[i]); OptLevel iopt; iopt.remove_collector=true; iopt.verbose_level = opt.verbose_level; if (optimize_static(*ipipe, iopt)<0) return -1; #if 0 ff_node* last = ipipe->get_lastnode(); if (last->isFarm() && !last->isOFarm()) { ff_farm* ifarm = reinterpret_cast(last); opt_report(opt.verbose_level, OPT_NORMAL, "OPT (farm): Looking for optimizations in the internal last farm of a pipeline %ld\n",i); if (optimize_internal_farm(*ifarm)<0) return -1; } #endif } if (W[i]->isAll2All()) { ff_a2a *a2a = reinterpret_cast(W[i]); const svector& W1 = a2a->getFirstSet(); const svector& W2 = a2a->getSecondSet(); for(size_t j=0;jisPipe()) { ff_pipeline* ipipe=reinterpret_cast(W1[j]); OptLevel iopt; iopt.remove_collector=true; iopt.verbose_level = opt.verbose_level; if (optimize_static(*ipipe, iopt)<0) return -1; } } for(size_t j=0;jisPipe()) { ff_pipeline* ipipe=reinterpret_cast(W2[j]); OptLevel iopt; iopt.remove_collector=true; iopt.verbose_level = opt.verbose_level; if (optimize_static(*ipipe, iopt)<0) return -1; } } } } } } // swithing to blocking mode if the n. of threads is greater than the threshold if (opt.blocking_mode) { ssize_t card = farm.cardinality(); if (opt.max_nb_threads < card) { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (farm): BLOCKING_MODE: Activating blocking mode, threshold=%ld, number of threads=%ld\n",opt.max_nb_threads, card); farm.blocking_mode(true); } } // turning off initial/default mapping if the n. of threads is greater than the threshold if (opt.no_default_mapping) { ssize_t card = farm.cardinality(); if (opt.max_mapped_threads < card) { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (farm): MAPPING: Disabling mapping, threshold=%ld, number of threads=%ld\n",opt.max_mapped_threads, card); farm.no_mapping(); } } // no initial barrier if (opt.no_initial_barrier) { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (farm): NO_INITIAL_BARRIER: Initial barrier disabled\n"); farm.no_barrier(); } return 0; } /* * */ static inline int optimize_static(ff_pipeline& pipe, const OptLevel& opt=OptLevel1()) { if (pipe.prepared) { error("optimize_static (pipeline) called after prepare\n"); return -1; } // flattening the pipeline pipe.flatten(); int nstages=static_cast(pipe.nodes_list.size()); // looking for farm and all-to-all because they might have pipeline inside // for each nested pipeline the optimize_pipeline function is recursively // called following a depth-first search OptLevel iopt(opt); iopt.blocking_mode = false; iopt.no_initial_barrier = false; iopt.no_default_mapping = false; for(int i=0;iisFarm()) { ff_farm *farm = reinterpret_cast(pipe.nodes_list[i]); const svector& W = farm->getWorkers(); for(size_t j=0;jisPipe()) { ff_pipeline* ipipe=reinterpret_cast(W[j]); if (optimize_static(*ipipe, iopt)) return -1; } } } else if (pipe.nodes_list[i]->isAll2All()) { ff_a2a *a2a = reinterpret_cast(pipe.nodes_list[i]); const svector& W1 = a2a->getFirstSet(); const svector& W2 = a2a->getSecondSet(); for(size_t j=0;jisPipe()) { ff_pipeline* ipipe=reinterpret_cast(W1[j]); if (optimize_static(*ipipe, iopt)) return -1; } } for(size_t j=0;jisPipe()) { ff_pipeline* ipipe=reinterpret_cast(W2[j]); if (optimize_static(*ipipe, iopt)) return -1; } } } } // ------------------ helping function ---------------------- auto find_farm_with_null_collector = [](const svector& nodeslist, int start=0)->int { for(int i=start;i<(int)(nodeslist.size());++i) { if (nodeslist[i]->isFarm()) { ff_farm *farm = reinterpret_cast(nodeslist[i]); if (farm->getCollector() == nullptr) return i; } } return -1; }; auto find_farm_with_null_emitter = [](const svector& nodeslist, int start=0)->int { for(int i=start;i<(int)(nodeslist.size());++i) { if (nodeslist[i]->isFarm() && (nullptr == (reinterpret_cast(nodeslist[i]))->getEmitter()) ) return i; } return -1; }; // looking for the longest sequence of farms (or ofarms) with the same number of workers auto farm_sequence = [&](const svector& nodeslist, int& first_farm, int& last_farm) { bool ofarm = nodeslist[first_farm]->isOFarm(); size_t nworkers = (reinterpret_cast(nodeslist[first_farm]))->getNWorkers(); int starting_point=first_farm+1; int first = first_farm, last = last_farm; while (starting_point(nodeslist.size())) { bool ok=true; int next = find_farm_with_null_emitter(nodeslist, starting_point); if (next == -1) break; else { for(int i=starting_point; i<=next;) { if ((ofarm?nodeslist[i]->isOFarm():!nodeslist[i]->isOFarm()) && (nworkers == (reinterpret_cast(nodeslist[i]))->getNWorkers()) && (nullptr == (reinterpret_cast(nodeslist[i]))->getCollector()) && (nullptr == (reinterpret_cast(nodeslist[i]))->getEmitter()) ) ++i; else { ok = false; break; } } if (ok) if ((reinterpret_cast(nodeslist[next]))->getEmitter() != nullptr) ok=false; } if (ok) { last = next; starting_point = next+1; } else { if (last==-1) { first = find_farm_with_null_collector(nodeslist, first+1); if (first==-1) break; starting_point = first_farm+1; } break; } } if (first != -1 && last != -1) { first_farm = first; last_farm = last; } }; // introduces the normal-form of a sequence of farms (or ofarms) auto combine_farm_sequence = [](const svector& nodeslist, int first_farm, int last_farm) { svector > W(16); W.resize(last_farm-first_farm+1); for(int i=first_farm, j=0; i<=last_farm; ++i,++j) { W[j]=reinterpret_cast(nodeslist[i])->getWorkers(); } size_t nfarms = W.size(); size_t nworkers = W[0].size(); std::vector Workers(nworkers); for(size_t j=0; j(p); } } ff_farm* firstfarm= reinterpret_cast(nodeslist[first_farm]); ff_farm* lastfarm = reinterpret_cast(nodeslist[last_farm]); ff_farm* newfarm = new ff_farm; if (firstfarm->isOFarm()) { assert(lastfarm->isOFarm()); newfarm->set_ordered(); } if (firstfarm->getEmitter()) newfarm->add_emitter(firstfarm->getEmitter()); if (lastfarm->hasCollector()) newfarm->add_collector(lastfarm->getCollector()); newfarm->add_workers(Workers); newfarm->cleanup_workers(); newfarm->set_scheduling_ondemand(firstfarm->ondemand_buffer()); return newfarm; }; // ---------------- end helping function -------------------- if (opt.merge_farms) { // find the first farm with default collector or with no collector int first_farm = find_farm_with_null_collector(pipe.nodes_list); if (first_farm!=-1) { do { int last_farm = -1; farm_sequence(pipe.nodes_list,first_farm,last_farm); if (first_farm(pipe.nodes_list.size())); } } if (opt.introduce_a2a) { int first_farm = find_farm_with_null_collector(pipe.nodes_list); while(first_farm != -1 && (first_farm < static_cast(pipe.nodes_list.size()-1))) { if (!pipe.nodes_list[first_farm]->isOFarm()) { if (pipe.nodes_list[first_farm+1]->isFarm() && !pipe.nodes_list[first_farm+1]->isOFarm()) { ff_farm *farm1 = reinterpret_cast(pipe.nodes_list[first_farm]); ff_farm *farm2 = reinterpret_cast(pipe.nodes_list[first_farm+1]); if (farm2->getEmitter() == nullptr) { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): INTRODUCE_A2A: Introducing all-to-all between %d and %d stages\n", first_farm, first_farm+1); const ff_farm f = combine_farms_a2a(*farm1, *farm2); ff_farm* newfarm = new ff_farm(f); assert(newfarm); pipe.remove_stage(first_farm); pipe.remove_stage(first_farm); pipe.insert_stage(first_farm, newfarm, true); first_farm = find_farm_with_null_collector(pipe.nodes_list, first_farm+1); } } else { if (pipe.nodes_list[first_farm+1]->isOFarm()) opt_report(opt.verbose_level, OPT_INFO, "OPT (pipe): INTRODUCE_A2A: cannot introduce A2A because node %d is an ordered farm\n", first_farm+1); first_farm = find_farm_with_null_collector(pipe.nodes_list, first_farm+2); } } else { opt_report(opt.verbose_level, OPT_INFO, "OPT (pipe): INTRODUCE_A2A: cannot introduce A2A because node %d is an ordered farm\n", first_farm); first_farm = find_farm_with_null_collector(pipe.nodes_list, first_farm+1); } } } if (opt.remove_collector) { // first, for all farms in the pipeline we try to optimize farms' workers OptLevel farmopt; farmopt.remove_collector = true; farmopt.verbose_level = opt.verbose_level; for(size_t i=0;iisFarm()) { ff_farm *farmnode = reinterpret_cast(pipe.nodes_list[i]); if (optimize_static(*farmnode, farmopt)<0) { error("optimize_static, trying to optimize the farm at stage %ld\n", i); return -1; } } } int first_farm, next=0; while((first_farm=find_farm_with_null_collector(pipe.nodes_list, next)) != -1) { if (first_farm < static_cast(pipe.nodes_list.size()-1)) { // TODO: if the next stage is A2A would be nice to have a rule // that attaches the farm workers with the first set of nodes ff_farm *farm = reinterpret_cast(pipe.nodes_list[first_farm]); if (farm->hasCollector()) { if (farm->isOFarm()) { if ((!pipe.nodes_list[first_farm+1]->isAll2All()) && (!pipe.nodes_list[first_farm+1]->isFarm())) { farm->add_collector(pipe.nodes_list[first_farm+1]); pipe.remove_stage(first_farm+1); opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): REMOVE_COLLECTOR: Merged next stage with ordered-farm collector\n"); } } else { if (!pipe.nodes_list[first_farm+1]->isAll2All()) { farm->remove_collector(); opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): REMOVE_COLLECTOR: Removed farm collector\n"); if (!pipe.nodes_list[first_farm+1]->isMultiInput()) { // the next stage is a standard node ff_node *next = pipe.nodes_list[first_farm+1]; pipe.remove_stage(first_farm+1); ff_minode *mi = new internal_mi_transformer(next); assert(mi); pipe.insert_stage(first_farm+1, mi, true); opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): REMOVE_COLLECTOR: Transformed next stage to multi-input node\n"); } } } } } else { // this is the last stage (or the only stage) ff_farm *farm = reinterpret_cast(pipe.nodes_list[first_farm]); if (!farm->isOFarm()) { if (farm->hasCollector()) { farm->remove_collector(); opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): REMOVE_COLLECTOR: Removed farm collector\n"); } } } next=first_farm+1; } } if (opt.merge_with_emitter) { int first_farm = find_farm_with_null_emitter(pipe.nodes_list); if (first_farm!=-1) { if (first_farm>0) { // it is not the first one bool prev_single_standard = (!pipe.nodes_list[first_farm-1]->isMultiOutput()); if (prev_single_standard) { // could be a farm with a collector if (pipe.nodes_list[first_farm-1]->isFarm()) { ff_farm *farm_prev = reinterpret_cast(pipe.nodes_list[first_farm-1]); if (farm_prev->hasCollector()) { ff_node* collector=farm_prev->getCollector(); if (collector->isMultiInput() && !collector->isComp()) { error("MERGING MULTI-INPUT COLLECTOR TO THE FARM EMITTER NOT YET SUPPORTED\n"); abort(); // TODO we have to create a multi-input comp to add to the emitter } opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): MERGE_WITH_EMITTER: Merged previous stage with farm emitter\n"); ff_farm *farm = reinterpret_cast(pipe.nodes_list[first_farm]); farm->add_emitter(collector); farm_prev->remove_collector(); } } else { ff_node *node = pipe.nodes_list[first_farm-1]; if (node->isMultiInput() && !node->isComp()) { error("MERGING MULTI-INPUT NODE TO THE FARM EMITTER NOT YET SUPPORTED\n"); //TODO: we have to create a multi-input comp to add to the emitter abort(); } ff_farm *farm = reinterpret_cast(pipe.nodes_list[first_farm]); if (pipe.nodes_list[first_farm]->isOFarm()) { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): MERGE_WITH_EMITTER: Merged previous stage with ordered-farm emitter\n"); } else { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): MERGE_WITH_EMITTER: Merged previous stage with farm emitter\n"); } farm->add_emitter(node); pipe.remove_stage(first_farm-1); } } } } } // activate blocking mode if the n. of threads is greater than the threshold if (opt.blocking_mode) { ssize_t card = pipe.cardinality(); if (opt.max_nb_threads < card) { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): BLOCKING_MODE: Activating blocking mode, threshold=%ld, number of threads=%ld\n",opt.max_nb_threads, card); pipe.blocking_mode(true); } } // turning off initial/default mapping if the n. of threads is greater than the threshold if (opt.no_default_mapping) { ssize_t card = pipe.cardinality(); if (opt.max_mapped_threads < card) { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): MAPPING: Disabling mapping, threshold=%ld, number of threads=%ld\n",opt.max_mapped_threads, card); pipe.no_mapping(); } } // no initial barrier if (opt.no_initial_barrier) { opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): NO_INITIAL_BARRIER: Initial barrier disabled\n"); pipe.no_barrier(); } return 0; } } // namespace ff #endif /* FF_OPTIMIZE_HPP */