mesytec-mnode/external/taskflow-3.8.0/3rd-party/ff/optimize.hpp
2025-01-04 01:25:05 +01:00

799 lines
34 KiB
C++

/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
/*!
* \link
* \file optimize.hpp
* \ingroup building_blocks
*
* \brief FastFlow optimization heuristics
*
* @detail FastFlow basic container for a shared-memory parallel activity
*
*/
#ifndef FF_OPTIMIZE_HPP
#define FF_OPTIMIZE_HPP
/* ***************************************************************************
*
* FastFlow is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License version 3 as
* published by the Free Software Foundation.
* Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3
* or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT)
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
****************************************************************************
*/
/*
* Author: Massimo Torquati
*
*/
#include <cstdio>
#include <cstdlib>
#include <cstdarg>
#include <ff/node.hpp>
#include <ff/pipeline.hpp>
#include <ff/farm.hpp>
#include <ff/all2all.hpp>
#include <ff/combine.hpp>
namespace ff {
typedef enum { OPT_NORMAL = 1, OPT_INFO = 2 } reportkind_t;
static inline void opt_report(int verbose_level, reportkind_t kind, const char *str, ...) {
if (verbose_level < kind) return;
va_list argp;
char * p=(char *)malloc(strlen(str)+512); // this is dangerous.....
assert(p);
strcpy(p, str);
va_start(argp, str);
vfprintf(stdout, p, argp);
va_end(argp);
free(p);
}
/**
* This function looks for internal farms with default collector in a farm building block.
* The internal default collectors are removed.
*/
static inline int remove_internal_collectors(ff_farm& farm) {
const svector<ff_node*>& W = farm.getWorkers();
for(size_t i=0;i<W.size();++i) {
if (W[i]->isFarm() && !W[i]->isOFarm()) {
ff_farm* ifarm = reinterpret_cast<ff_farm*>(W[i]);
if (remove_internal_collectors(*ifarm)<0) return -1;
if (ifarm->getCollector() == nullptr)
ifarm->remove_collector();
} else {
if (W[i]->isPipe()) {
ff_pipeline* ipipe = reinterpret_cast<ff_pipeline*>(W[i]);
OptLevel iopt;
iopt.remove_collector=true;
if (optimize_static(*ipipe, iopt)<0) return -1;
}
if (W[i]->isAll2All()) {
ff_a2a *a2a = reinterpret_cast<ff_a2a*>(W[i]);
const svector<ff_node*>& W1 = a2a->getFirstSet();
const svector<ff_node*>& W2 = a2a->getSecondSet();
for(size_t j=0;j<W1.size();++j) {
if (W1[j]->isPipe()) {
ff_pipeline* ipipe=reinterpret_cast<ff_pipeline*>(W1[j]);
OptLevel iopt;
iopt.remove_collector=true;
if (optimize_static(*ipipe, iopt)<0) return -1;
}
}
for(size_t j=0;j<W2.size();++j) {
if (W2[j]->isPipe()) {
ff_pipeline* ipipe=reinterpret_cast<ff_pipeline*>(W2[j]);
OptLevel iopt;
iopt.remove_collector=true;
if (optimize_static(*ipipe, iopt)<0) return -1;
}
}
}
}
}
return 0;
}
/**
* It combines the node passed as second parameter with the farm's emitter.
* The node is added at the left-hand side of the emitter.
* This transformation is logically equivalent to the following pipeline: ff_Pipe<> pipe(node, farm);
*/
static inline int combine_with_emitter(ff_farm& farm, ff_node*node, bool cleanup_node=false) {
if (node->isFarm() || node->isPipe() || node->isAll2All()) {
error("combine_with_emitter: the node to combine cannot be a parallel building block\n");
return -1;
}
ff_node* emitter = farm.getEmitter();
if (!emitter) {
farm.add_emitter(node);
farm.cleanup_emitter(cleanup_node);
return 0;
}
ff_comb* comb;
if (!emitter->isMultiOutput()) {
internal_mo_transformer *mo_emitter = new internal_mo_transformer(emitter,
farm.isset_cleanup_emitter());
comb = new ff_comb(node, mo_emitter, cleanup_node, true);
} else {
comb = new ff_comb(node,emitter,
cleanup_node, farm.isset_cleanup_emitter());
}
if (farm.isset_cleanup_emitter())
farm.cleanup_emitter(false);
farm.change_emitter(comb, true);
return 0;
}
/*
* It combines the node passed as parameter with the farm's collector.
* The node is added at the right-hand side of the collector.
* This transformation is logically equivalent to the following pipeline: ff_Pipe<> pipe(farm, node);
*/
static inline int combine_with_collector(ff_farm& farm, ff_node*node, bool cleanup_node=false) {
if (!farm.getCollector()) {
error("combine_with_collector: the farm passed as parameter does not have a collector\n");
return -1;
}
if (node->isFarm() || node->isPipe() || node->isAll2All()) {
error("combine_with_emitter: the node to combine cannot be a parallel building block\n");
return -1;
}
ff_node* collector = farm.getCollector();
ff_comb* comb = new ff_comb(collector, node,
farm.isset_cleanup_collector(), cleanup_node);
if (farm.isset_cleanup_collector())
farm.cleanup_collector(false);
farm.remove_collector();
farm.add_collector(comb, true);
return 0;
}
/*
* It combines the node passed as parameter with the first stage of the pipeline.
* The node is added at the left-hand side of the first pipeline node.
* This transformation is logically equivalent to the following pipeline: ff_Pipe<> pipe2(node, pipe);
*/
template<typename T>
static inline int combine_with_firststage(ff_pipeline& pipe, T* node, bool cleanup_node) {
pipe.flatten();
ff_node* node0 = pipe.get_node(0); // it cannot be a pipeline
if (!node0) {
error("combine_with_firststage: empty pipeline\n");
return -1;
}
if (node0->isAll2All()) {
return combine_left_with_a2a(*reinterpret_cast<ff_a2a*>(node0), node, cleanup_node);
}
if (node0->isFarm()) {
ff_farm &farm=*(ff_farm*)node0;
if (combine_with_emitter(farm, node, cleanup_node)<0) return -1;
pipe.remove_stage(0);
pipe.insert_stage(0, node0);
} else {
ff_comb* comb = new ff_comb(node, node0, cleanup_node);
pipe.remove_stage(0);
pipe.insert_stage(0, comb, true);
}
return 0;
}
template<typename T>
static inline int combine_right_with_farm(ff_farm& farm, T* node, bool cleanup_node) {
if (farm.hasCollector())
return combine_with_collector(farm, node, cleanup_node);
// farm with no collector
const svector<ff_node*>& w= farm.getWorkers();
assert(w.size()>0);
if (w[0]->isPipe()) { // NOTE: we suppose that all workers are homogeneous
if (combine_with_laststage(*reinterpret_cast<ff_pipeline*>(w[0]), node, cleanup_node)<0) return -1;
int r=0;
for(size_t i=1;i<w.size();++i) {
ff_pipeline* pipe = reinterpret_cast<ff_pipeline*>(w[i]);
r+=combine_with_laststage(*pipe, new T(*node), true);
}
return (r>0?-1:0);
}
if (w[0]->isFarm()) {
if (combine_right_with_farm(*reinterpret_cast<ff_farm*>(w[0]), node, cleanup_node)<0) return -1;
int r=0;
for(size_t i=1;i<w.size();++i) {
ff_farm* farm = reinterpret_cast<ff_farm*>(w[i]);
r+=combine_right_with_farm(*farm, new T(*node), true);
}
return (r>0?-1:0);
}
if (w[0]->isAll2All()) {
if (combine_right_with_a2a(*reinterpret_cast<ff_a2a*>(w[0]), node, cleanup_node)<0) return -1;
int r=0;
for(size_t i=1;i<w.size();++i) {
ff_a2a* a2a = reinterpret_cast<ff_a2a*>(w[i]);
r+=combine_right_with_a2a(*a2a, new T(*node), true);
}
return (r>0?-1:0);
}
bool workers_cleanup = farm.isset_cleanup_workers();
std::vector<ff_node*> new_workers;
ff_comb* comb = new ff_comb(w[0], node, workers_cleanup, cleanup_node);
assert(comb);
new_workers.push_back(comb);
for(size_t i=1;i<w.size();++i) {
ff_comb* c = new ff_comb(w[i], new T(*node), workers_cleanup, true);
assert(c);
new_workers.push_back(c);
}
farm.change_workers(new_workers);
return 0;
}
template<typename T>
static inline int combine_right_with_a2a(ff_a2a& a2a, T* node, bool cleanup_node) {
const svector<ff_node*>& w= a2a.getSecondSet();
if (w[0]->isPipe()) { // NOTE: we suppose that all workers are homogeneous
if (combine_with_laststage(*reinterpret_cast<ff_pipeline*>(w[0]), node, cleanup_node)<0) return -1;
int r=0;
for(size_t i=1;i<w.size();++i) {
assert(w[i]->isPipe());
ff_pipeline* pipe = reinterpret_cast<ff_pipeline*>(w[i]);
r+=combine_with_laststage(*pipe, new T(*node), true);
}
return (r>0?-1:0);
}
std::vector<ff_node*> new_secondset;
ff_comb* comb = new ff_comb(w[0], node, false, cleanup_node);
assert(comb);
new_secondset.push_back(comb);
for(size_t i=1;i<w.size();++i) {
ff_comb* c = new ff_comb(w[i], new T(*node), false, true);
assert(c);
new_secondset.push_back(c);
}
a2a.change_secondset(new_secondset, true);
return 0;
}
template<typename T>
static inline int combine_left_with_a2a(ff_a2a& a2a, T* node, bool cleanup_node) {
const svector<ff_node*>& w= a2a.getFirstSet();
if (w[0]->isPipe()) { // NOTE: we suppose that all workers are homogeneous
if (combine_with_firststage(*reinterpret_cast<ff_pipeline*>(w[0]), node, cleanup_node)<0)
return -1;
int r=0;
for(size_t i=1;i<w.size();++i) {
assert(w[i]->isPipe());
ff_pipeline* pipe = reinterpret_cast<ff_pipeline*>(w[i]);
r+=combine_with_firststage(*pipe, new T(*node), true);
}
return (r>0?-1:0);
}
std::vector<ff_node*> new_firstset;
ff_comb* comb = new ff_comb(node, w[0], cleanup_node, false);
assert(comb);
new_firstset.push_back(comb);
for(size_t i=1;i<w.size();++i) {
ff_comb* c = new ff_comb(new T(*node), w[i], true, false);
assert(c);
new_firstset.push_back(c);
}
a2a.change_firstset(new_firstset, a2a.ondemand_buffer(), true);
return 0;
}
/*
* It combines the node passed as second parameter with the last stage of the pipeline.
* The node is added at the right-hand side of the last pipeline stage.
* This transformation is logically equivalent to the following pipeline: ff_Pipe<> pipe2(pipe, node);
*/
template<typename T>
static inline int combine_with_laststage(ff_pipeline& pipe, T* node, bool cleanup_node) {
pipe.flatten();
ff_node* last = pipe.get_lastnode(); // it cannot be a pipeline
if (!last) {
error("combine_with_laststage: empty pipeline\n");
return -1;
}
if (last->isAll2All()) {
return combine_right_with_a2a(*reinterpret_cast<ff_a2a*>(last), node, cleanup_node);
}
if (last->isFarm()) {
return combine_right_with_farm(*reinterpret_cast<ff_farm*>(last), node, cleanup_node);
}
bool node_cleanup = pipe.isset_cleanup_nodes();
int nstages=static_cast<int>(pipe.nodes_list.size());
ff_comb* comb = new ff_comb(last, node, node_cleanup , cleanup_node);
pipe.remove_stage(nstages-1);
pipe.insert_stage((nstages-1)>0?(nstages-1):0, comb, true);
return 0;
}
/* This is farm specific.
* - It basically sets the threshold for enabling blocking mode.
* - It can remove the collector of internal farms in a farm of farms composition.
* - TODO: Farm of farms ---> single farm with two emitters combined (external+internal)
*/
static inline int optimize_static(ff_farm& farm, const OptLevel& opt=OptLevel1()) {
if (farm.prepared) {
error("optimize_static (farm) called after prepare\n");
return -1;
}
// optimizing internal pipelines, if any
OptLevel iopt(opt);
iopt.blocking_mode = false;
iopt.no_initial_barrier = false;
iopt.no_default_mapping = false;
const svector<ff_node*> &Workers = farm.getWorkers();
for(size_t i=0;i<Workers.size();++i) {
if (Workers[i]->isPipe()) {
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (farm): Looking for optimizations in the internal pipeline %ld\n",i);
ff_pipeline *ipipe = reinterpret_cast<ff_pipeline*>(Workers[i]);
if (optimize_static(*ipipe, iopt)) return -1;
}
}
// here it looks for internal farms with null collectors
if (opt.remove_collector) {
auto optimize_internal_farm = [opt](ff_farm& ifarm) {
OptLevel iopt;
iopt.remove_collector=true;
iopt.verbose_level = opt.verbose_level;
if (optimize_static(ifarm, iopt)<0) return -1;
if (ifarm.getCollector() == nullptr) {
opt_report(opt.verbose_level, OPT_NORMAL, "OPT (farm): REMOVE_COLLECTOR: Removed farm collector\n");
ifarm.remove_collector();
}
return 0;
};
const svector<ff_node*>& W = farm.getWorkers();
for(size_t i=0;i<W.size();++i) {
if (W[i]->isFarm() && !W[i]->isOFarm()) {
ff_farm* ifarm = reinterpret_cast<ff_farm*>(W[i]);
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (farm): Looking for optimizations in the internal farm %ld\n",i);
if (optimize_internal_farm(*ifarm)<0) return -1;
} else {
if (W[i]->isPipe()) {
ff_pipeline* ipipe = reinterpret_cast<ff_pipeline*>(W[i]);
OptLevel iopt;
iopt.remove_collector=true;
iopt.verbose_level = opt.verbose_level;
if (optimize_static(*ipipe, iopt)<0) return -1;
#if 0
ff_node* last = ipipe->get_lastnode();
if (last->isFarm() && !last->isOFarm()) {
ff_farm* ifarm = reinterpret_cast<ff_farm*>(last);
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (farm): Looking for optimizations in the internal last farm of a pipeline %ld\n",i);
if (optimize_internal_farm(*ifarm)<0) return -1;
}
#endif
}
if (W[i]->isAll2All()) {
ff_a2a *a2a = reinterpret_cast<ff_a2a*>(W[i]);
const svector<ff_node*>& W1 = a2a->getFirstSet();
const svector<ff_node*>& W2 = a2a->getSecondSet();
for(size_t j=0;j<W1.size();++j) {
if (W1[j]->isPipe()) {
ff_pipeline* ipipe=reinterpret_cast<ff_pipeline*>(W1[j]);
OptLevel iopt;
iopt.remove_collector=true;
iopt.verbose_level = opt.verbose_level;
if (optimize_static(*ipipe, iopt)<0) return -1;
}
}
for(size_t j=0;j<W2.size();++j) {
if (W2[j]->isPipe()) {
ff_pipeline* ipipe=reinterpret_cast<ff_pipeline*>(W2[j]);
OptLevel iopt;
iopt.remove_collector=true;
iopt.verbose_level = opt.verbose_level;
if (optimize_static(*ipipe, iopt)<0) return -1;
}
}
}
}
}
}
// swithing to blocking mode if the n. of threads is greater than the threshold
if (opt.blocking_mode) {
ssize_t card = farm.cardinality();
if (opt.max_nb_threads < card) {
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (farm): BLOCKING_MODE: Activating blocking mode, threshold=%ld, number of threads=%ld\n",opt.max_nb_threads, card);
farm.blocking_mode(true);
}
}
// turning off initial/default mapping if the n. of threads is greater than the threshold
if (opt.no_default_mapping) {
ssize_t card = farm.cardinality();
if (opt.max_mapped_threads < card) {
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (farm): MAPPING: Disabling mapping, threshold=%ld, number of threads=%ld\n",opt.max_mapped_threads, card);
farm.no_mapping();
}
}
// no initial barrier
if (opt.no_initial_barrier) {
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (farm): NO_INITIAL_BARRIER: Initial barrier disabled\n");
farm.no_barrier();
}
return 0;
}
/*
*
*/
static inline int optimize_static(ff_pipeline& pipe, const OptLevel& opt=OptLevel1()) {
if (pipe.prepared) {
error("optimize_static (pipeline) called after prepare\n");
return -1;
}
// flattening the pipeline
pipe.flatten();
int nstages=static_cast<int>(pipe.nodes_list.size());
// looking for farm and all-to-all because they might have pipeline inside
// for each nested pipeline the optimize_pipeline function is recursively
// called following a depth-first search
OptLevel iopt(opt);
iopt.blocking_mode = false;
iopt.no_initial_barrier = false;
iopt.no_default_mapping = false;
for(int i=0;i<nstages;++i) {
if (pipe.nodes_list[i]->isFarm()) {
ff_farm *farm = reinterpret_cast<ff_farm*>(pipe.nodes_list[i]);
const svector<ff_node*>& W = farm->getWorkers();
for(size_t j=0;j<W.size();++j) {
if (W[j]->isPipe()) {
ff_pipeline* ipipe=reinterpret_cast<ff_pipeline*>(W[j]);
if (optimize_static(*ipipe, iopt)) return -1;
}
}
} else if (pipe.nodes_list[i]->isAll2All()) {
ff_a2a *a2a = reinterpret_cast<ff_a2a*>(pipe.nodes_list[i]);
const svector<ff_node*>& W1 = a2a->getFirstSet();
const svector<ff_node*>& W2 = a2a->getSecondSet();
for(size_t j=0;j<W1.size();++j) {
if (W1[j]->isPipe()) {
ff_pipeline* ipipe=reinterpret_cast<ff_pipeline*>(W1[j]);
if (optimize_static(*ipipe, iopt)) return -1;
}
}
for(size_t j=0;j<W2.size();++j) {
if (W2[j]->isPipe()) {
ff_pipeline* ipipe=reinterpret_cast<ff_pipeline*>(W2[j]);
if (optimize_static(*ipipe, iopt)) return -1;
}
}
}
}
// ------------------ helping function ----------------------
auto find_farm_with_null_collector =
[](const svector<ff_node*>& nodeslist, int start=0)->int {
for(int i=start;i<(int)(nodeslist.size());++i) {
if (nodeslist[i]->isFarm()) {
ff_farm *farm = reinterpret_cast<ff_farm*>(nodeslist[i]);
if (farm->getCollector() == nullptr) return i;
}
}
return -1;
};
auto find_farm_with_null_emitter =
[](const svector<ff_node*>& nodeslist, int start=0)->int {
for(int i=start;i<(int)(nodeslist.size());++i) {
if (nodeslist[i]->isFarm() &&
(nullptr == (reinterpret_cast<ff_farm*>(nodeslist[i]))->getEmitter())
)
return i;
}
return -1;
};
// looking for the longest sequence of farms (or ofarms) with the same number of workers
auto farm_sequence =
[&](const svector<ff_node*>& nodeslist, int& first_farm, int& last_farm) {
bool ofarm = nodeslist[first_farm]->isOFarm();
size_t nworkers = (reinterpret_cast<ff_farm*>(nodeslist[first_farm]))->getNWorkers();
int starting_point=first_farm+1;
int first = first_farm, last = last_farm;
while (starting_point<static_cast<int>(nodeslist.size())) {
bool ok=true;
int next = find_farm_with_null_emitter(nodeslist, starting_point);
if (next == -1) break;
else {
for(int i=starting_point; i<=next;) {
if ((ofarm?nodeslist[i]->isOFarm():!nodeslist[i]->isOFarm()) &&
(nworkers == (reinterpret_cast<ff_farm*>(nodeslist[i]))->getNWorkers()) &&
(nullptr == (reinterpret_cast<ff_farm*>(nodeslist[i]))->getCollector()) &&
(nullptr == (reinterpret_cast<ff_farm*>(nodeslist[i]))->getEmitter())
)
++i;
else { ok = false; break; }
}
if (ok)
if ((reinterpret_cast<ff_farm*>(nodeslist[next]))->getEmitter() != nullptr) ok=false;
}
if (ok) {
last = next;
starting_point = next+1;
} else {
if (last==-1) {
first = find_farm_with_null_collector(nodeslist, first+1);
if (first==-1) break;
starting_point = first_farm+1;
} break;
}
}
if (first != -1 && last != -1) {
first_farm = first;
last_farm = last;
}
};
// introduces the normal-form of a sequence of farms (or ofarms)
auto combine_farm_sequence =
[](const svector<ff_node*>& nodeslist, int first_farm, int last_farm) {
svector<svector<ff_node*> > W(16);
W.resize(last_farm-first_farm+1);
for(int i=first_farm, j=0; i<=last_farm; ++i,++j) {
W[j]=reinterpret_cast<ff_farm*>(nodeslist[i])->getWorkers();
}
size_t nfarms = W.size();
size_t nworkers = W[0].size();
std::vector<ff_node*> Workers(nworkers);
for(size_t j=0; j<nworkers; ++j) {
if (nfarms==2) {
ff_comb *p = new ff_comb(W[0][j],W[1][j]);
assert(p);
Workers[j] = p;
} else {
const ff_comb *p = new ff_comb(W[0][j],W[1][j]);
for(size_t i=2;i<nfarms;++i) {
const ff_comb* combtmp = new ff_comb(*p, W[i][j]);
assert(combtmp);
delete p;
p = combtmp;
}
Workers[j] = const_cast<ff_comb*>(p);
}
}
ff_farm* firstfarm= reinterpret_cast<ff_farm*>(nodeslist[first_farm]);
ff_farm* lastfarm = reinterpret_cast<ff_farm*>(nodeslist[last_farm]);
ff_farm* newfarm = new ff_farm;
if (firstfarm->isOFarm()) {
assert(lastfarm->isOFarm());
newfarm->set_ordered();
}
if (firstfarm->getEmitter()) newfarm->add_emitter(firstfarm->getEmitter());
if (lastfarm->hasCollector())
newfarm->add_collector(lastfarm->getCollector());
newfarm->add_workers(Workers);
newfarm->cleanup_workers();
newfarm->set_scheduling_ondemand(firstfarm->ondemand_buffer());
return newfarm;
};
// ---------------- end helping function --------------------
if (opt.merge_farms) {
// find the first farm with default collector or with no collector
int first_farm = find_farm_with_null_collector(pipe.nodes_list);
if (first_farm!=-1) {
do {
int last_farm = -1;
farm_sequence(pipe.nodes_list,first_farm,last_farm);
if (first_farm<last_farm) { // normal form
ff_farm *newfarm = combine_farm_sequence(pipe.nodes_list,first_farm,last_farm);
for(int i=first_farm; i<=last_farm; ++i) pipe.remove_stage(first_farm);
pipe.insert_stage(first_farm, newfarm, true);
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (pipe): MERGE_FARMS: Merged farms staged [%d-%d]\n", first_farm, last_farm);
}
first_farm = find_farm_with_null_collector(pipe.nodes_list, first_farm+1);
}while(first_farm!=-1 && first_farm < static_cast<int>(pipe.nodes_list.size()));
}
}
if (opt.introduce_a2a) {
int first_farm = find_farm_with_null_collector(pipe.nodes_list);
while(first_farm != -1 && (first_farm < static_cast<int>(pipe.nodes_list.size()-1))) {
if (!pipe.nodes_list[first_farm]->isOFarm()) {
if (pipe.nodes_list[first_farm+1]->isFarm() && !pipe.nodes_list[first_farm+1]->isOFarm()) {
ff_farm *farm1 = reinterpret_cast<ff_farm*>(pipe.nodes_list[first_farm]);
ff_farm *farm2 = reinterpret_cast<ff_farm*>(pipe.nodes_list[first_farm+1]);
if (farm2->getEmitter() == nullptr) {
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (pipe): INTRODUCE_A2A: Introducing all-to-all between %d and %d stages\n", first_farm, first_farm+1);
const ff_farm f = combine_farms_a2a(*farm1, *farm2);
ff_farm* newfarm = new ff_farm(f);
assert(newfarm);
pipe.remove_stage(first_farm);
pipe.remove_stage(first_farm);
pipe.insert_stage(first_farm, newfarm, true);
first_farm = find_farm_with_null_collector(pipe.nodes_list, first_farm+1);
}
} else {
if (pipe.nodes_list[first_farm+1]->isOFarm())
opt_report(opt.verbose_level, OPT_INFO,
"OPT (pipe): INTRODUCE_A2A: cannot introduce A2A because node %d is an ordered farm\n", first_farm+1);
first_farm = find_farm_with_null_collector(pipe.nodes_list, first_farm+2);
}
} else {
opt_report(opt.verbose_level, OPT_INFO,
"OPT (pipe): INTRODUCE_A2A: cannot introduce A2A because node %d is an ordered farm\n", first_farm);
first_farm = find_farm_with_null_collector(pipe.nodes_list, first_farm+1);
}
}
}
if (opt.remove_collector) {
// first, for all farms in the pipeline we try to optimize farms' workers
OptLevel farmopt;
farmopt.remove_collector = true;
farmopt.verbose_level = opt.verbose_level;
for(size_t i=0;i<pipe.nodes_list.size();++i) {
if (pipe.nodes_list[i]->isFarm()) {
ff_farm *farmnode = reinterpret_cast<ff_farm*>(pipe.nodes_list[i]);
if (optimize_static(*farmnode, farmopt)<0) {
error("optimize_static, trying to optimize the farm at stage %ld\n", i);
return -1;
}
}
}
int first_farm, next=0;
while((first_farm=find_farm_with_null_collector(pipe.nodes_list, next)) != -1) {
if (first_farm < static_cast<int>(pipe.nodes_list.size()-1)) {
// TODO: if the next stage is A2A would be nice to have a rule
// that attaches the farm workers with the first set of nodes
ff_farm *farm = reinterpret_cast<ff_farm*>(pipe.nodes_list[first_farm]);
if (farm->hasCollector()) {
if (farm->isOFarm()) {
if ((!pipe.nodes_list[first_farm+1]->isAll2All()) &&
(!pipe.nodes_list[first_farm+1]->isFarm())) {
farm->add_collector(pipe.nodes_list[first_farm+1]);
pipe.remove_stage(first_farm+1);
opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): REMOVE_COLLECTOR: Merged next stage with ordered-farm collector\n");
}
} else {
if (!pipe.nodes_list[first_farm+1]->isAll2All()) {
farm->remove_collector();
opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): REMOVE_COLLECTOR: Removed farm collector\n");
if (!pipe.nodes_list[first_farm+1]->isMultiInput()) {
// the next stage is a standard node
ff_node *next = pipe.nodes_list[first_farm+1];
pipe.remove_stage(first_farm+1);
ff_minode *mi = new internal_mi_transformer(next);
assert(mi);
pipe.insert_stage(first_farm+1, mi, true);
opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): REMOVE_COLLECTOR: Transformed next stage to multi-input node\n");
}
}
}
}
} else { // this is the last stage (or the only stage)
ff_farm *farm = reinterpret_cast<ff_farm*>(pipe.nodes_list[first_farm]);
if (!farm->isOFarm()) {
if (farm->hasCollector()) {
farm->remove_collector();
opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): REMOVE_COLLECTOR: Removed farm collector\n");
}
}
}
next=first_farm+1;
}
}
if (opt.merge_with_emitter) {
int first_farm = find_farm_with_null_emitter(pipe.nodes_list);
if (first_farm!=-1) {
if (first_farm>0) { // it is not the first one
bool prev_single_standard = (!pipe.nodes_list[first_farm-1]->isMultiOutput());
if (prev_single_standard) {
// could be a farm with a collector
if (pipe.nodes_list[first_farm-1]->isFarm()) {
ff_farm *farm_prev = reinterpret_cast<ff_farm*>(pipe.nodes_list[first_farm-1]);
if (farm_prev->hasCollector()) {
ff_node* collector=farm_prev->getCollector();
if (collector->isMultiInput() && !collector->isComp()) {
error("MERGING MULTI-INPUT COLLECTOR TO THE FARM EMITTER NOT YET SUPPORTED\n");
abort();
// TODO we have to create a multi-input comp to add to the emitter
}
opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): MERGE_WITH_EMITTER: Merged previous stage with farm emitter\n");
ff_farm *farm = reinterpret_cast<ff_farm*>(pipe.nodes_list[first_farm]);
farm->add_emitter(collector);
farm_prev->remove_collector();
}
} else {
ff_node *node = pipe.nodes_list[first_farm-1];
if (node->isMultiInput() && !node->isComp()) {
error("MERGING MULTI-INPUT NODE TO THE FARM EMITTER NOT YET SUPPORTED\n");
//TODO: we have to create a multi-input comp to add to the emitter
abort();
}
ff_farm *farm = reinterpret_cast<ff_farm*>(pipe.nodes_list[first_farm]);
if (pipe.nodes_list[first_farm]->isOFarm()) {
opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): MERGE_WITH_EMITTER: Merged previous stage with ordered-farm emitter\n");
} else {
opt_report(opt.verbose_level, OPT_NORMAL, "OPT (pipe): MERGE_WITH_EMITTER: Merged previous stage with farm emitter\n");
}
farm->add_emitter(node);
pipe.remove_stage(first_farm-1);
}
}
}
}
}
// activate blocking mode if the n. of threads is greater than the threshold
if (opt.blocking_mode) {
ssize_t card = pipe.cardinality();
if (opt.max_nb_threads < card) {
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (pipe): BLOCKING_MODE: Activating blocking mode, threshold=%ld, number of threads=%ld\n",opt.max_nb_threads, card);
pipe.blocking_mode(true);
}
}
// turning off initial/default mapping if the n. of threads is greater than the threshold
if (opt.no_default_mapping) {
ssize_t card = pipe.cardinality();
if (opt.max_mapped_threads < card) {
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (pipe): MAPPING: Disabling mapping, threshold=%ld, number of threads=%ld\n",opt.max_mapped_threads, card);
pipe.no_mapping();
}
}
// no initial barrier
if (opt.no_initial_barrier) {
opt_report(opt.verbose_level, OPT_NORMAL,
"OPT (pipe): NO_INITIAL_BARRIER: Initial barrier disabled\n");
pipe.no_barrier();
}
return 0;
}
} // namespace ff
#endif /* FF_OPTIMIZE_HPP */