1598 lines
58 KiB
C++
1598 lines
58 KiB
C++
/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
|
|
/*!
|
|
* \link
|
|
* \file combine.hpp
|
|
* \ingroup building_blocks
|
|
*
|
|
* \brief FastFlow composition building block
|
|
*
|
|
* @detail FastFlow basic contanier for a shared-memory parallel activity
|
|
*
|
|
*/
|
|
|
|
#ifndef FF_COMBINE_HPP
|
|
#define FF_COMBINE_HPP
|
|
|
|
/* ***************************************************************************
|
|
*
|
|
* FastFlow is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU Lesser General Public License version 3 as
|
|
* published by the Free Software Foundation.
|
|
* Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3
|
|
* or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT)
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
* License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
*
|
|
****************************************************************************
|
|
*/
|
|
|
|
/*
|
|
* Author: Massimo Torquati
|
|
*
|
|
*/
|
|
// This file contains the ff_comb combiner building block class
|
|
// the ff_comb_t class, which is the type-preserving version of ff_comb,
|
|
// and some helper functions, e.g., combine_nodes, combine_farms, etc.
|
|
|
|
|
|
#include <ff/node.hpp>
|
|
#include <ff/multinode.hpp>
|
|
#include <ff/pipeline.hpp>
|
|
#include <ff/ordering_policies.hpp>
|
|
#include <ff/farm.hpp>
|
|
|
|
namespace ff {
|
|
|
|
|
|
// forward declaration
|
|
class ff_comb;
|
|
static const ff_pipeline combine_ofarm_farm(ff_farm& farm1, ff_farm& farm2);
|
|
template<typename T1, typename T2>
|
|
static const ff_comb combine_nodes(T1& n1, T2& n2);
|
|
template<typename T1, typename T2>
|
|
static std::unique_ptr<ff_node> unique_combine_nodes(T1& n1, T2& n2);
|
|
|
|
|
|
class ff_comb: public ff_minode {
|
|
//
|
|
// NOTE: the ff_comb appears either as a standard ff_node or as ff_minode depending on
|
|
// whether the first node is a standard node or a multi-input node.
|
|
//
|
|
|
|
template<typename T1, typename T2>
|
|
friend const ff_comb combine_nodes(T1& n1, T2& n2);
|
|
|
|
template<typename T1, typename T2>
|
|
friend std::unique_ptr<ff_node> unique_combine_nodes(T1& n1, T2& n2);
|
|
|
|
friend class ff_loadbalancer;
|
|
friend class ff_gatherer;
|
|
friend class ff_farm;
|
|
friend class ff_a2a;
|
|
|
|
// used if the last stage has no output channel
|
|
static bool devnull(void*,int,unsigned long, unsigned long, void*) {return true;}
|
|
|
|
private:
|
|
void registerAllGatherCallback(int (*cb)(void *,void **, void*), void * arg) {
|
|
assert(isMultiInput());
|
|
// NOTE: the gt of the first node will be replaced by the ff_comb gt.
|
|
ff_minode::getgt()->registerAllGatherCallback(cb,arg);
|
|
}
|
|
|
|
public:
|
|
template<typename T1, typename T2>
|
|
ff_comb(T1& n1, T2& n2) {
|
|
add_node(n1,n2);
|
|
}
|
|
ff_comb(ff_node* n1, ff_node* n2, bool first_cleanup=false, bool second_cleanup=false){
|
|
if (!n1 || !n2) {
|
|
error("COMBINE, passing null pointer to constructor\n");
|
|
return;
|
|
}
|
|
add_node(n1,n2);
|
|
if (first_cleanup) {
|
|
cleanup_stages.push_back(n1);
|
|
}
|
|
if (second_cleanup) {
|
|
cleanup_stages.push_back(n2);
|
|
}
|
|
}
|
|
|
|
ff_comb(const ff_comb& c) : ff_minode(c) {
|
|
for (auto s: c.comp_nodes) {
|
|
if (s->isComp()) {
|
|
comp_nodes.push_back(new ff_comb(*(ff_comb*)s));
|
|
assert(comp_nodes.back());
|
|
cleanup_stages.push_back(comp_nodes.back());
|
|
} else {
|
|
comp_nodes.push_back(s);
|
|
}
|
|
}
|
|
// this is a dirty part, we modify a const object.....
|
|
ff_comb *dirty= const_cast<ff_comb*>(&c);
|
|
for (size_t i=0;i<dirty->cleanup_stages.size();++i) {
|
|
cleanup_stages.push_back(dirty->cleanup_stages[i]);
|
|
dirty->cleanup_stages[i]=nullptr;
|
|
}
|
|
}
|
|
|
|
virtual ~ff_comb() {
|
|
for (auto s: cleanup_stages) {
|
|
if (s) delete s;
|
|
}
|
|
}
|
|
|
|
int run(bool skip_init=false) {
|
|
if (!skip_init) {
|
|
if (getFirst()->get_in_buffer() == nullptr)
|
|
getFirst()->skipfirstpop(true);
|
|
}
|
|
|
|
if (!prepared) if (prepare()<0) return -1;
|
|
|
|
// set blocking mode for the last node of the composition
|
|
getLast()->blocking_mode(blocking_in);
|
|
if (comp_nodes[0]->isMultiInput()) {
|
|
svector<ff_node*> w(1);
|
|
getFirst()->get_in_nodes(w);
|
|
if (w.size() == 0) getFirst()->skipfirstpop(true);
|
|
return ff_minode::run();
|
|
}
|
|
|
|
if (ff_node::run(true)<0) return -1;
|
|
return 0;
|
|
}
|
|
|
|
int wait() {
|
|
if (comp_nodes[0]->isMultiInput())
|
|
return ff_minode::wait();
|
|
if (ff_node::wait()<0) return -1;
|
|
return 0;
|
|
}
|
|
|
|
int run_and_wait_end() {
|
|
if (isfrozen()) { // TODO
|
|
error("COMB: Error: FEATURE NOT YET SUPPORTED\n");
|
|
return -1;
|
|
}
|
|
stop();
|
|
if (run()<0) return -1;
|
|
if (wait()<0) return -1;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* \brief checks if the node is running
|
|
*
|
|
*/
|
|
bool done() const {
|
|
if (comp_nodes[0]->isMultiInput())
|
|
return ff_minode::done();
|
|
return ff_node::done();
|
|
}
|
|
|
|
// NOTE: it is multi-input only if the first node is multi-input
|
|
bool isMultiInput() const {
|
|
if (getFirst()->isMultiInput()) return true;
|
|
return false;
|
|
}
|
|
// NOTE: it is multi-output only if the last node is multi-output
|
|
bool isMultiOutput() const {
|
|
if (getLast()->isMultiOutput()) return true;
|
|
return false;
|
|
}
|
|
inline bool isComp() const { return true; }
|
|
|
|
// returns the first sequential node (not comb) on the left-hand side
|
|
ff_node* getFirst() const {
|
|
if (comp_nodes[0]->isComp())
|
|
return ((ff_comb*)comp_nodes[0])->getFirst();
|
|
return comp_nodes[0];
|
|
}
|
|
// returns the last sequential node (not comb) on the right-hand side
|
|
ff_node* getLast() const {
|
|
if (comp_nodes[1]->isComp())
|
|
return ((ff_comb*)comp_nodes[1])->getLast();
|
|
return comp_nodes[1];
|
|
}
|
|
ff_node* getLeft() const {
|
|
return comp_nodes[0];
|
|
}
|
|
ff_node* getRight() const {
|
|
return comp_nodes[1];
|
|
}
|
|
|
|
// returns the pointer to the "replaced" node
|
|
ff_node* replace_first(ff_node* n, bool cleanup=false, bool remove_from_cleanuplist=true) {
|
|
if (comp_nodes[0]->isComp()) return nullptr;
|
|
ff_node* first = comp_nodes[0];
|
|
comp_nodes[0] = n;
|
|
|
|
if (remove_from_cleanuplist) {
|
|
ssize_t pos=-1;
|
|
for(size_t i=0;i<cleanup_stages.size();++i)
|
|
if (cleanup_stages[i] == first) { pos=i; break;}
|
|
if (pos>=0)
|
|
cleanup_stages.erase(cleanup_stages.begin()+pos);
|
|
}
|
|
if (cleanup)
|
|
cleanup_stages.push_back(n);
|
|
return first;
|
|
}
|
|
|
|
// returns the pointer to the "replaced" node
|
|
ff_node* replace_last(ff_node* n, bool cleanup=false, bool remove_from_cleanuplist=true) {
|
|
if (comp_nodes[1]->isComp()) return nullptr;
|
|
ff_node* last = comp_nodes[1];
|
|
comp_nodes[1] = n;
|
|
|
|
if (remove_from_cleanuplist) {
|
|
ssize_t pos=-1;
|
|
for(size_t i=0;i<cleanup_stages.size();++i)
|
|
if (cleanup_stages[i] == last) { pos=i; break;}
|
|
if (pos>=0)
|
|
cleanup_stages.erase(cleanup_stages.begin()+pos);
|
|
}
|
|
if (cleanup)
|
|
cleanup_stages.push_back(n);
|
|
return last;
|
|
}
|
|
|
|
bool change_node(ff_node* old, ff_node* n, bool cleanup=false, bool remove_from_cleanuplist=false) {
|
|
if (comp_nodes[0] == old)
|
|
return (replace_first(n, cleanup, remove_from_cleanuplist) != nullptr);
|
|
if (comp_nodes[1] == old)
|
|
return (replace_last(n, cleanup, remove_from_cleanuplist) != nullptr);
|
|
return false;
|
|
}
|
|
|
|
// returns true if the "replaced" node has been deleted (it was added with cleanup=true)
|
|
template<typename T>
|
|
bool changeFirst(T* n, bool cleanup=false) {
|
|
bool r=false;
|
|
ff_comb* c = getFirstComb();
|
|
ff_node* first = getFirst();
|
|
|
|
ssize_t pos=-1;
|
|
for(size_t i=0;i<cleanup_stages.size();++i)
|
|
if (cleanup_stages[i] == first) { pos=i; break;}
|
|
if (pos>=0) {
|
|
cleanup_stages.erase(cleanup_stages.begin()+pos);
|
|
r = true;
|
|
}
|
|
c->replace_first(n, cleanup, false);
|
|
if (r) delete first;
|
|
return r;
|
|
}
|
|
// returns true if the "replaced" node has been deleted (it was added with cleanup=true)
|
|
template<typename T>
|
|
bool changeLast(T* n, bool cleanup=false) {
|
|
bool r=false;
|
|
ff_comb* c = getLastComb();
|
|
ff_node* last = getLast();
|
|
|
|
ssize_t pos=-1;
|
|
for(size_t i=0;i<cleanup_stages.size();++i)
|
|
if (cleanup_stages[i] == last) { pos=i; break;}
|
|
if (pos>=0) {
|
|
cleanup_stages.erase(cleanup_stages.begin()+pos);
|
|
r = true;
|
|
}
|
|
c->replace_last(n, cleanup, false);
|
|
if (r) delete last;
|
|
return r;
|
|
}
|
|
|
|
double ffTime() {
|
|
return diffmsec(getstoptime(),getstarttime());
|
|
}
|
|
double ffwTime() {
|
|
return diffmsec(getwstoptime(),getwstartime());
|
|
}
|
|
|
|
#if defined(TRACE_FASTFLOW)
|
|
void ffStats(std::ostream & out) {
|
|
out << "--- Comp:\n";
|
|
if (comp_nodes[0]->isMultiInput()) {
|
|
ff_minode::ffStats(out);
|
|
} else
|
|
ff_node::ffStats(out);
|
|
}
|
|
#else
|
|
void ffStats(std::ostream & out) {
|
|
out << "FastFlow trace not enabled\n";
|
|
}
|
|
#endif
|
|
|
|
#ifdef DFF_ENABLED
|
|
virtual bool isSerializable(){ return comp_nodes[1]->isSerializable(); }
|
|
virtual bool isDeserializable(){ return comp_nodes[0]->isDeserializable(); }
|
|
virtual std::pair<decltype(serializeF), decltype(freetaskF)> getSerializationFunction(){ return comp_nodes[1]->getSerializationFunction(); }
|
|
virtual std::pair<decltype(deserializeF), decltype(alloctaskF)> getDeserializationFunction(){ return comp_nodes[0]->getDeserializationFunction(); }
|
|
|
|
#endif
|
|
|
|
protected:
|
|
ff_comb():ff_minode() {}
|
|
|
|
template<typename T1, typename T2>
|
|
inline bool check(T1* n1, T2* n2) {
|
|
if (n1->isFarm() || n1->isAll2All() || n1->isPipe() ||
|
|
n2->isFarm() || n2->isAll2All() || n2->isPipe()) {
|
|
error("COMBINE, input nodes cannot be farm, all-2-all or pipeline building-blocks\n");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
template<typename T1, typename T2>
|
|
inline bool check(T1& n1, T2& n2) {
|
|
return check(&n1, &n2);
|
|
}
|
|
void add_node(ff_node* n1, ff_node* n2) {
|
|
if (!check(n1, n2)) return;
|
|
n1->registerCallback(n2->ff_send_out_comp, n2);
|
|
comp_nodes.push_back(n1);
|
|
comp_nodes.push_back(n2);
|
|
}
|
|
template<typename T1>
|
|
void add_node(const T1& n1, ff_node* n2) {
|
|
T1 *node1 = new T1(n1);
|
|
assert(node1);
|
|
if (!check(node1, n2)) return;
|
|
cleanup_stages.push_back(node1);
|
|
comp_nodes.push_back(node1);
|
|
comp_nodes.push_back(n2);
|
|
}
|
|
template<typename T1, typename T2>
|
|
void add_node(T1& n1, T2& n2) {
|
|
if (!check(&n1, &n2)) return;
|
|
n1.registerCallback(n2.ff_send_out_comp, &n2);
|
|
comp_nodes.push_back(&n1);
|
|
comp_nodes.push_back(&n2);
|
|
}
|
|
template<typename T1, typename T2>
|
|
void add_node(const T1& n1, const T2& n2) {
|
|
T1 *node1 = new T1(n1);
|
|
T2 *node2 = new T2(n2);
|
|
assert(node1 && node2);
|
|
cleanup_stages.push_back(node1);
|
|
cleanup_stages.push_back(node2);
|
|
add_node(*node1, *node2);
|
|
}
|
|
template<typename T1, typename T2>
|
|
void add_node(T1& n1, const T2& n2) {
|
|
T2 *node2 = new T2(n2);
|
|
assert(node2);
|
|
cleanup_stages.push_back(node2);
|
|
add_node(n1, *node2);
|
|
}
|
|
template<typename T1, typename T2>
|
|
void add_node(const T1& n1, T2& n2) {
|
|
T1 *node1 = new T1(n1);
|
|
assert(node1);
|
|
cleanup_stages.push_back(node1);
|
|
add_node(*node1, n2);
|
|
}
|
|
|
|
void skipfirstpop(bool sk) {
|
|
getFirst()->skipfirstpop(sk);
|
|
ff_node::skipfirstpop(sk);
|
|
}
|
|
|
|
#ifdef DFF_ENABLED
|
|
void skipallpop(bool sk) {
|
|
getFirst()->skipallpop(sk);
|
|
ff_node::skipallpop(sk);
|
|
}
|
|
#endif
|
|
|
|
|
|
bool put(void * ptr) {
|
|
return ff_node::put(ptr);
|
|
}
|
|
// returns the innermost combine on the left-hand side
|
|
ff_comb* getFirstComb() {
|
|
if (comp_nodes[0]->isComp())
|
|
return ((ff_comb*)comp_nodes[0])->getFirstComb();
|
|
return this;
|
|
}
|
|
// returns the innermost combine on the right-hand side
|
|
ff_comb* getLastComb() {
|
|
if (comp_nodes[1]->isComp())
|
|
return ((ff_comb*)comp_nodes[1])->getLastComb();
|
|
return this;
|
|
}
|
|
|
|
void registerCallback(bool (*cb)(void *,int,unsigned long,unsigned long,void *), void * arg) {
|
|
comp_nodes[1]->registerCallback(cb,arg);
|
|
}
|
|
|
|
void connectCallback() {
|
|
if (comp_nodes[0]->isComp())
|
|
((ff_comb*)comp_nodes[0])->connectCallback();
|
|
if (comp_nodes[1]->isComp())
|
|
((ff_comb*)comp_nodes[1])->connectCallback();
|
|
|
|
svector<ff_node*> w1(1);
|
|
svector<ff_node*> w2(1);
|
|
comp_nodes[0]->get_out_nodes(w1);
|
|
comp_nodes[1]->get_in_nodes(w2);
|
|
if (w1.size() == 0 && w2.size() == 0) return;
|
|
if (w1.size()>1 || w2.size()>1) {
|
|
error("COMP, connecting callbacks\n");
|
|
return;
|
|
}
|
|
|
|
ff_node *n1 = (w1.size() == 0)? comp_nodes[0]:w1[0];
|
|
n1->registerCallback(this->ff_send_out_comp, this);
|
|
}
|
|
|
|
int dryrun() {
|
|
if (prepared) return 0;
|
|
if (comp_nodes[0]->dryrun()<0) return -1;
|
|
if (comp_nodes[1]->dryrun()<0) return -1;
|
|
return 0;
|
|
}
|
|
int prepare() {
|
|
if (prepared) return 0;
|
|
connectCallback();
|
|
|
|
// checking if the first node is a multi-input node
|
|
ff_node *n1 = getFirst();
|
|
if (n1->isMultiInput()) {
|
|
// here we substitute the gt
|
|
((ff_minode*)n1)->setgt(ff_minode::getgt());
|
|
}
|
|
// dryrun should be executed here because the gt of the
|
|
// first node might have been substituted
|
|
ff_comb::dryrun();
|
|
|
|
// registering a special callback if the last stage does
|
|
// not have an output channel
|
|
ff_node *n2 = getLast();
|
|
if (n2->isMultiOutput()) {
|
|
svector<ff_node*> w(1);
|
|
n2->get_out_nodes(w);
|
|
if ((w.size()==0) && (n2->callback == nullptr))
|
|
n2->registerCallback(devnull, nullptr); // devnull callback
|
|
} else
|
|
if ((n2->get_out_buffer() == nullptr) && (n2->callback == nullptr))
|
|
n2->registerCallback(devnull, nullptr); // devnull callback
|
|
|
|
|
|
prepared = true;
|
|
return 0;
|
|
}
|
|
|
|
void set_multiinput() {
|
|
// see farm.hpp
|
|
// when the composition is passed as filter of a farm collector (which is by
|
|
// default a multi-input node) the filter is seen as multi-input because we want
|
|
// to avoid calling eosnotify multiple times (see ff_comb::eosnotify)
|
|
// The same applies for the farm emitter.
|
|
if (comp_nodes[0]->isComp())
|
|
return comp_nodes[0]->set_multiinput();
|
|
comp_multi_input=true;
|
|
}
|
|
|
|
void set_neos(ssize_t n) {
|
|
getFirst()->set_neos(n);
|
|
}
|
|
|
|
inline int cardinality(BARRIER_T * const barrier) {
|
|
ff_node::set_barrier(barrier);
|
|
return ff_minode::cardinality(barrier);
|
|
}
|
|
|
|
virtual void set_id(ssize_t id) {
|
|
myid = id;
|
|
if (comp_nodes.size()) {
|
|
for(size_t j=0;j<comp_nodes.size(); ++j) {
|
|
comp_nodes[j]->set_id(myid);
|
|
}
|
|
}
|
|
}
|
|
|
|
int svc_init() {
|
|
neos=0;
|
|
for(size_t j=0;j<comp_nodes.size(); ++j) {
|
|
int r;
|
|
if ((r=comp_nodes[j]->svc_init())<0) return r;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// main service function
|
|
void *svc(void *task) {
|
|
void *ret = FF_GO_ON;
|
|
void *r1;
|
|
|
|
if (comp_nodes[0]->isComp())
|
|
ret = comp_nodes[0]->svc(task);
|
|
else {
|
|
#ifdef DFF_ENABLED
|
|
if (task || comp_nodes[0]->skipfirstpop() || comp_nodes[0]->skipallpop()) {
|
|
#else
|
|
if (task || comp_nodes[0]->skipfirstpop()){
|
|
#endif
|
|
r1= comp_nodes[0]->svc(task);
|
|
if (!(r1 == FF_GO_ON || r1 == FF_GO_OUT || r1 == FF_EOS_NOFREEZE)) {
|
|
comp_nodes[0]->ff_send_out(r1);
|
|
}
|
|
if (r1 == FF_EOS)
|
|
ret=FF_GO_OUT;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void svc_end() {
|
|
for(size_t j=0;j<comp_nodes.size(); ++j) {
|
|
comp_nodes[j]->svc_end();
|
|
}
|
|
}
|
|
|
|
// this is called by the ff_send_out for those nodes that are inside a combine
|
|
bool push_comp_local(void *task) {
|
|
if (task == FF_EOS) {
|
|
comp_nodes[1]->eosnotify();
|
|
propagateEOS();
|
|
return true;
|
|
}
|
|
void *r = comp_nodes[1]->svc(task);
|
|
if (r == FF_GO_ON || r== FF_GO_OUT || r == FF_EOS_NOFREEZE) return true;
|
|
if (r == FF_EOS) {
|
|
propagateEOS();
|
|
return true;
|
|
}
|
|
return comp_nodes[1]->ff_send_out(r);
|
|
}
|
|
|
|
int set_output(const svector<ff_node *> & w) {
|
|
return comp_nodes[1]->set_output(w);
|
|
}
|
|
int set_output(ff_node *n) {
|
|
return comp_nodes[1]->set_output(n);
|
|
}
|
|
int set_output_feedback(ff_node *n) {
|
|
return comp_nodes[1]->set_output_feedback(n);
|
|
}
|
|
int set_input(const svector<ff_node *> & w) {
|
|
//assert(comp_nodes[0]->isMultiInput());
|
|
if (comp_nodes[0]->set_input(w)<0) return -1;
|
|
// if the first node of the comp is a multi-input node
|
|
// we have to set the input of the current ff_minode that
|
|
// is implementing the composition
|
|
return ff_minode::set_input(w);
|
|
}
|
|
int set_input(ff_node *n) {
|
|
//assert(comp_nodes[0]->isMultiInput());
|
|
if (comp_nodes[0]->set_input(n)<0) return -1;
|
|
// if the first node of the comp is a multi-input node
|
|
// we have to set the input of the current ff_minode that
|
|
// is implementing the composition
|
|
return ff_minode::set_input(n);
|
|
}
|
|
int set_input_feedback(ff_node *n) {
|
|
//assert(comp_nodes[0]->isMultiInput());
|
|
if (comp_nodes[0]->set_input_feedback(n)<0) return -1;
|
|
// if the first node of the comp is a multi-input node
|
|
// we have to set the input of the current ff_minode that
|
|
// is implementing the composition
|
|
return ff_minode::set_input_feedback(n);
|
|
}
|
|
|
|
void blocking_mode(bool blk=true) {
|
|
blocking_in=blocking_out=blk;
|
|
ff_node *n = getLast();
|
|
if (n) n->blocking_mode(blocking_in);
|
|
}
|
|
|
|
void set_scheduling_ondemand(const int inbufferentries=1) {
|
|
if (!isMultiOutput()) return;
|
|
ff_node* n= getLast();
|
|
assert(n->isMultiOutput());
|
|
n->set_scheduling_ondemand(inbufferentries);
|
|
}
|
|
int ondemand_buffer() const {
|
|
if (!isMultiOutput()) return 0;
|
|
ff_node* n= getLast();
|
|
assert(n->isMultiOutput());
|
|
return n->ondemand_buffer();
|
|
}
|
|
|
|
void eosnotify(ssize_t id=-1) {
|
|
comp_nodes[0]->eosnotify(id);
|
|
|
|
++neos;
|
|
// if the first node is multi-input or is a comp passed as filter to a farm collector,
|
|
// then we have to call eosnotify only if we have received all EOSs
|
|
if (comp_nodes[0]->isMultiInput() || comp_multi_input) {
|
|
const ssize_t n=getFirst()->get_neos();
|
|
if (neos >= n)
|
|
comp_nodes[1]->eosnotify(id);
|
|
return;
|
|
}
|
|
comp_nodes[1]->eosnotify(id);
|
|
}
|
|
|
|
void propagateEOS(void *task=FF_EOS) {
|
|
if (comp_nodes[1]->isComp()) {
|
|
comp_nodes[1]->propagateEOS(task);
|
|
return;
|
|
}
|
|
|
|
if (comp_nodes[1]->isMultiOutput())
|
|
comp_nodes[1]->propagateEOS(task);
|
|
else
|
|
comp_nodes[1]->ff_send_out(task);
|
|
}
|
|
|
|
void get_out_nodes(svector<ff_node*>&w) {
|
|
size_t len=w.size();
|
|
comp_nodes[1]->get_out_nodes(w);
|
|
if (len == w.size() && !comp_nodes[1]->isComp())
|
|
w.push_back(comp_nodes[1]);
|
|
}
|
|
void get_in_nodes(svector<ff_node*>&w) {
|
|
size_t len=w.size();
|
|
comp_nodes[0]->get_in_nodes(w);
|
|
if (len == w.size() && !comp_nodes[0]->isComp())
|
|
w.push_back(comp_nodes[0]);
|
|
}
|
|
|
|
void get_in_nodes_feedback(svector<ff_node*>&w) {
|
|
comp_nodes[0]->get_in_nodes_feedback(w);
|
|
}
|
|
|
|
int create_input_buffer(int nentries, bool fixedsize=FF_FIXED_SIZE) {
|
|
if (isMultiInput()) {
|
|
int r= ff_minode::create_input_buffer(nentries,fixedsize);
|
|
if (r<0) return r;
|
|
svector<ff_node*> w(1);
|
|
ff_minode::get_in_nodes(w);
|
|
assert(w.size()==1);
|
|
r=ff_node::set_input_buffer(w[0]->get_in_buffer());
|
|
return r;
|
|
}
|
|
int r = ff_node::create_input_buffer(nentries,fixedsize);
|
|
if (r<0) return r;
|
|
r = getFirst()->set_input_buffer(ff_node::get_in_buffer());
|
|
return r;
|
|
}
|
|
int create_output_buffer(int nentries, bool fixedsize=FF_FIXED_SIZE) {
|
|
return comp_nodes[1]->create_output_buffer(nentries,fixedsize);
|
|
}
|
|
FFBUFFER * get_in_buffer() const {
|
|
//if (getFirst()->isMultiInput()) return nullptr;
|
|
return ff_node::get_in_buffer();
|
|
}
|
|
|
|
int set_output_buffer(FFBUFFER * const o) {
|
|
return comp_nodes[1]->set_output_buffer(o);
|
|
}
|
|
|
|
// a composition can be passed as filter to a farm emitter
|
|
void setlb(ff_loadbalancer *elb, bool cleanup=false) {
|
|
comp_nodes[1]->setlb(elb, cleanup);
|
|
}
|
|
// a composition can be passed as filter to a farm collector
|
|
void setgt(ff_gatherer *egt, bool cleanup=false) {
|
|
comp_nodes[0]->setgt(egt, cleanup);
|
|
ff_minode::setgt(egt, cleanup);
|
|
}
|
|
|
|
// consumer
|
|
bool init_input_blocking(pthread_mutex_t *&m,
|
|
pthread_cond_t *&c,
|
|
bool /*feedback*/=true) {
|
|
ff_node *n = getFirst();
|
|
if (n->isMultiInput()) {
|
|
// inits local gt, which is used for gathering tasks....
|
|
bool r= ff_minode::init_input_blocking(m,c);
|
|
if (!r) return false;
|
|
// ... then, sets all p_cons_* on all input channels
|
|
svector<ff_node*> w(1);
|
|
n->get_in_nodes(w);
|
|
n->get_in_nodes_feedback(w);
|
|
for(size_t i=0;i<w.size(); ++i)
|
|
w[i]->set_output_blocking(m,c);
|
|
return true;
|
|
}
|
|
bool r = ff_node::init_input_blocking(m,c);
|
|
if (!r) return false;
|
|
// if the first node is a standard node or a multi-output node
|
|
// then the comb node and the first node share the same
|
|
// cond variable. This is due to the put_done method in the lb
|
|
// (i.e. the prev node is a multi-output or an emitter node)
|
|
assert(n->cons_m == nullptr);
|
|
n->set_cons_c(c);
|
|
//n->cons_c = c; n->cons_m = nullptr; <---- TOGLIERE
|
|
return true;
|
|
}
|
|
// producer
|
|
bool init_output_blocking(pthread_mutex_t *&m,
|
|
pthread_cond_t *&c,
|
|
bool /*feedback*/=true) {
|
|
return comp_nodes[1]->init_output_blocking(m,c);
|
|
}
|
|
void set_output_blocking(pthread_mutex_t *&m,
|
|
pthread_cond_t *&c,
|
|
bool canoverwrite=false) {
|
|
comp_nodes[1]->set_output_blocking(m,c, canoverwrite);
|
|
}
|
|
|
|
// the following calls are needed because a composition
|
|
// uses as output channel(s) the one(s) of the second node.
|
|
// these functions should not be called if the node is multi-output
|
|
inline bool get(void **ptr) { return comp_nodes[1]->get(ptr);}
|
|
inline pthread_cond_t &get_cons_c() {
|
|
ff_node *n = getFirst();
|
|
if (n->isMultiInput()) return ff_minode::get_cons_c();
|
|
return ff_node::get_cons_c();
|
|
}
|
|
|
|
FFBUFFER *get_out_buffer() const {
|
|
if (getLast()->isMultiOutput()) return nullptr;
|
|
return comp_nodes[1]->get_out_buffer();
|
|
}
|
|
inline bool ff_send_out(void * task, int id=-1,
|
|
unsigned long retry=((unsigned long)-1),
|
|
unsigned long ticks=(ff_node::TICKS2WAIT)) {
|
|
return comp_nodes[1]->ff_send_out(task,id,retry,ticks);
|
|
}
|
|
|
|
inline bool ff_send_out_to(void * task,int id, unsigned long retry=((unsigned long)-1),
|
|
unsigned long ticks=(ff_node::TICKS2WAIT)) {
|
|
return comp_nodes[1]->ff_send_out(task,id,retry,ticks);
|
|
}
|
|
|
|
const struct timeval getstarttime() const {
|
|
if (comp_nodes[0]->isMultiInput()) return ff_minode::getstarttime();
|
|
return ff_node::getstarttime();
|
|
}
|
|
const struct timeval getstoptime() const {
|
|
if (comp_nodes[0]->isMultiInput()) return ff_minode::getstoptime();
|
|
return ff_node::getstoptime();
|
|
}
|
|
const struct timeval getwstartime() const {
|
|
if (comp_nodes[0]->isMultiInput()) return ff_minode::getwstartime();
|
|
return ff_node::getwstartime();
|
|
|
|
}
|
|
const struct timeval getwstoptime() const {
|
|
if (comp_nodes[0]->isMultiInput()) return ff_minode::getwstoptime();
|
|
return ff_node::getwstoptime();
|
|
}
|
|
|
|
private:
|
|
svector<ff_node*> comp_nodes;
|
|
svector<ff_node*> cleanup_stages;
|
|
bool comp_multi_input = false;
|
|
ssize_t neos=0;
|
|
};
|
|
|
|
/*
|
|
* Type-preserving combiner building block
|
|
*
|
|
*/
|
|
template <typename TIN, typename T, typename TOUT>
|
|
struct ff_comb_t: ff_comb {
|
|
typedef TIN IN_t;
|
|
typedef T T_t;
|
|
typedef TOUT OUT_t;
|
|
|
|
ff_comb_t(ff_node_t<TIN, T>* n1, ff_node_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
ff_comb_t(ff_node_t<TIN, T>* n1, ff_minode_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
ff_comb_t(ff_node_t<TIN, T>* n1, ff_monode_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
template<typename S>
|
|
ff_comb_t(ff_node_t<TIN, T>* n1, ff_comb_t<T, S, TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
|
|
|
|
ff_comb_t(ff_minode_t<TIN, T>* n1, ff_node_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
ff_comb_t(ff_minode_t<TIN, T>* n1, ff_minode_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
ff_comb_t(ff_minode_t<TIN, T>* n1, ff_monode_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
template<typename S>
|
|
ff_comb_t(ff_minode_t<TIN, T>* n1, ff_comb_t<T, S, TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
|
|
ff_comb_t(ff_monode_t<TIN, T>* n1, ff_node_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
ff_comb_t(ff_monode_t<TIN, T>* n1, ff_minode_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
ff_comb_t(ff_monode_t<TIN, T>* n1, ff_monode_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
template<typename S>
|
|
ff_comb_t(ff_monode_t<TIN, T>* n1, ff_comb_t<T, S, TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
|
|
template<typename S>
|
|
ff_comb_t(ff_comb_t<TIN, S, T>* n1, ff_node_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
template<typename S>
|
|
ff_comb_t(ff_comb_t<TIN, S, T>* n1, ff_minode_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
template<typename S>
|
|
ff_comb_t(ff_comb_t<TIN, S, T>* n1, ff_monode_t<T,TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
template<typename S, typename W>
|
|
ff_comb_t(ff_comb_t<TIN, S, T>* n1, ff_comb_t<T, W, TOUT>* n2, bool cleanup1=false, bool cleanup2=false):
|
|
ff_comb(n1,n2,cleanup1,cleanup2) {}
|
|
};
|
|
|
|
|
|
/* *************************************************************************** *
|
|
* *
|
|
* helper functions *
|
|
* *
|
|
* *************************************************************************** */
|
|
|
|
/**
|
|
* combines either basic nodes or ff_comb(s)
|
|
*
|
|
*/
|
|
template<typename T1, typename T2>
|
|
static inline const ff_comb combine_nodes(T1& n1, T2& n2) {
|
|
ff_comb comp;
|
|
comp.add_node(n1,n2);
|
|
return comp;
|
|
}
|
|
|
|
/**
|
|
* combines either basic nodes or ff_comb(s) and returns a unique_ptr
|
|
* useful to add ff_comb as farm's workers
|
|
*/
|
|
template<typename T1, typename T2>
|
|
static inline std::unique_ptr<ff_node> unique_combine_nodes(T1& n1, T2& n2) {
|
|
ff_comb *c = new ff_comb;
|
|
assert(c);
|
|
std::unique_ptr<ff_node> comp(c);
|
|
if (!c->check(n1,n2)) return comp;
|
|
c->add_node(n1,n2);
|
|
return comp;
|
|
}
|
|
|
|
/**
|
|
* combines two stages returning a pipeline:
|
|
* - node1 and node2 standard nodes (or ff_comb) --> pipeline(node1, node2)
|
|
* - node1 standard node and node2 is a farm --> pipeline(node2) (node1 is merged with node2's emitter)
|
|
* - node1 is a farm and node2 is a standard node --> pipeline(node1) (node2 is merged with node1's collector)
|
|
* - node1 and node2 are both farms --> pipeline(node1, node2) (collector is merged with emitter -- see case4.2 of combine_farms)
|
|
* (NOTE: if node1 is an ordered farm, then its collector is not removed)
|
|
*/
|
|
static inline const ff_pipeline combine_nodes_in_pipeline(ff_node& node1, ff_node& node2, bool cleanup1=false, bool cleanup2=false) {
|
|
if (node1.isAll2All() || node2.isAll2All()) {
|
|
error("combine_nodes_in_pipeline, cannot be used if one of the nodes is A2A\n");
|
|
return ff_pipeline();
|
|
}
|
|
if (node1.isOFarm()) {
|
|
if (node2.isFarm()) {
|
|
ff_farm *farm1 = reinterpret_cast<ff_farm*>(&node1);
|
|
ff_farm *farm2 = reinterpret_cast<ff_farm*>(&node2);
|
|
if (cleanup1) farm1->cleanup_all();
|
|
if (cleanup2) farm2->cleanup_all();
|
|
return combine_ofarm_farm(*farm1, *farm2);
|
|
}
|
|
error("combine_nodes_in_pipeline, FEATURE NOT YET SUPPORTED (node1 ordered farm and node2 standard or combine node\n");
|
|
abort(); // FIX: TODO <---------
|
|
return ff_pipeline();
|
|
}
|
|
if (!node1.isFarm() && !node2.isFarm()) { // two sequential nodes
|
|
ff_pipeline pipe;
|
|
pipe.add_stage(&node1, cleanup1);
|
|
pipe.add_stage(&node2, cleanup2);
|
|
return pipe;
|
|
} else if (!node1.isFarm() && node2.isFarm()) { // seq with farm's emitter
|
|
ff_pipeline pipe;
|
|
ff_farm* farm = reinterpret_cast<ff_farm*>(&node2);
|
|
ff_node *e = farm->getEmitter();
|
|
if (!e) farm->add_emitter(&node1);
|
|
else {
|
|
ff_comb *p;
|
|
if (!e->isMultiOutput()) { // we have to transform the emitter node into a multi-output
|
|
if (e->isMultiInput()) { // this is a "strange" case: the emitter is multi-input without being also multi-output (through a combine node)
|
|
struct hnode:ff_monode {
|
|
void* svc(void*in) {return in;}
|
|
};
|
|
|
|
// c is a multi-input AND multi-output node
|
|
ff_comb *c = new ff_comb(e, new hnode,
|
|
farm->isset_cleanup_emitter(), true);
|
|
assert(c);
|
|
p = new ff_comb(&node1, c, cleanup1, true);
|
|
} else {
|
|
auto mo = new internal_mo_transformer(e, farm->isset_cleanup_emitter());
|
|
assert(mo);
|
|
p = new ff_comb(&node1, mo, cleanup1, true);
|
|
}
|
|
} else {
|
|
p = new ff_comb(&node1, e, cleanup1, farm->isset_cleanup_emitter());
|
|
}
|
|
assert(p);
|
|
if (farm->isset_cleanup_emitter()) farm->cleanup_emitter(false);
|
|
farm->change_emitter(p, true);
|
|
}
|
|
pipe.add_stage(farm, cleanup2);
|
|
return pipe;
|
|
} else if (node1.isFarm() && !node2.isFarm()) { // first farm and seq
|
|
ff_pipeline pipe;
|
|
ff_farm* farm = reinterpret_cast<ff_farm*>(&node1);
|
|
ff_node *c = farm->getCollector();
|
|
if (!c) farm->add_collector(&node2, cleanup2);
|
|
else {
|
|
ff_comb *p = new ff_comb(c, &node2, farm->isset_cleanup_collector(), cleanup2);
|
|
if (farm->isset_cleanup_collector()) farm->cleanup_collector(false);
|
|
farm->remove_collector();
|
|
farm->add_collector(p, true);
|
|
}
|
|
pipe.add_stage(farm, cleanup1);
|
|
return pipe;
|
|
}
|
|
assert(node1.isFarm() && node2.isFarm());
|
|
ff_farm* farm1 = reinterpret_cast<ff_farm*>(&node1);
|
|
ff_farm* farm2 = reinterpret_cast<ff_farm*>(&node2);
|
|
|
|
ff_node *e = farm2->getEmitter();
|
|
ff_node *c = farm1->getCollector();
|
|
ff_pipeline pipe;
|
|
if (c) {
|
|
ff_comb *p = new ff_comb(c,e,
|
|
farm1->isset_cleanup_collector(),
|
|
farm2->isset_cleanup_emitter());
|
|
if (farm1->isset_cleanup_collector()) farm1->cleanup_collector(false);
|
|
if (farm2->isset_cleanup_emitter()) farm2->cleanup_emitter(false);
|
|
farm2->change_emitter(p, true);
|
|
}
|
|
farm1->remove_collector();
|
|
pipe.add_stage(farm1, cleanup1);
|
|
pipe.add_stage(farm2, cleanup2);
|
|
return pipe;
|
|
}
|
|
|
|
|
|
/**
|
|
* It combines two farms where farm1 has a default collector and
|
|
* farm2 has a default emitter node. It produces a new farm whose
|
|
* worker is an all-to-all building block.
|
|
*
|
|
*/
|
|
static inline const ff_farm combine_farms_a2a(ff_farm& farm1, ff_farm& farm2) {
|
|
ff_farm newfarm;
|
|
|
|
if (farm1.getCollector() != nullptr) {
|
|
error("combine_farms, first farm has a non-default collector\n");
|
|
return newfarm;
|
|
}
|
|
|
|
if (farm2.getEmitter() != nullptr) {
|
|
error("ff_comb, second farm has a non-default emitter, use: combine_farm(farm1, emitter2, farm2)\n");
|
|
return newfarm;
|
|
}
|
|
|
|
ff_a2a *a2a = new ff_a2a;
|
|
assert(a2a);
|
|
|
|
const svector<ff_node *> & w1= farm1.getWorkers();
|
|
const svector<ff_node *> & w2= farm2.getWorkers();
|
|
|
|
std::vector<ff_node*> W1(w1.size());
|
|
std::vector<ff_node*> W2(w2.size());
|
|
for(size_t i=0;i<W1.size();++i) W1[i]=w1[i];
|
|
for(size_t i=0;i<W2.size();++i) W2[i]=w2[i];
|
|
|
|
ff_node* emitter1 = farm1.getEmitter();
|
|
if (emitter1) {
|
|
newfarm.add_emitter(emitter1);
|
|
if (farm1.isset_cleanup_emitter()) {
|
|
newfarm.cleanup_emitter(true);
|
|
farm1.cleanup_emitter(false);
|
|
}
|
|
}
|
|
ff_node* collector2 = farm2.getCollector();
|
|
if (farm2.hasCollector()) {
|
|
newfarm.add_collector(collector2);
|
|
if (farm2.isset_cleanup_collector()) {
|
|
newfarm.cleanup_collector(true);
|
|
farm2.cleanup_collector(false);
|
|
}
|
|
}
|
|
if (farm2.isset_cleanup_collector()) farm2.cleanup_collector(false);
|
|
|
|
a2a->add_firstset(W1, farm2.ondemand_buffer(), farm1.isset_cleanup_workers());
|
|
a2a->add_secondset(W2, farm2.isset_cleanup_workers());
|
|
if (farm1.isset_cleanup_workers()) farm1.cleanup_workers(false);
|
|
if (farm2.isset_cleanup_workers()) farm2.cleanup_workers(false);
|
|
|
|
std::vector<ff_node*> W;
|
|
W.push_back(a2a);
|
|
newfarm.add_workers(W);
|
|
if (farm1.ondemand_buffer())
|
|
newfarm.set_scheduling_ondemand(farm1.ondemand_buffer());
|
|
newfarm.cleanup_workers();
|
|
|
|
return newfarm;
|
|
}
|
|
|
|
/**
|
|
* It combines two farms so that the new farm produced has a single worker
|
|
* that is an all-to-all building block.
|
|
* The node passed as second parameter is composed with each worker of
|
|
* the first set of workers.
|
|
*
|
|
*/
|
|
template<typename E_t>
|
|
static inline const ff_farm combine_farms_a2a(ff_farm &farm1, const E_t& node, ff_farm &farm2) {
|
|
ff_farm newfarm;
|
|
|
|
ff_a2a *a2a = new ff_a2a;
|
|
assert(a2a);
|
|
|
|
const svector<ff_node *> & w1= farm1.getWorkers();
|
|
const svector<ff_node *> & w2= farm2.getWorkers();
|
|
|
|
std::vector<ff_node*> W1(w1.size());
|
|
std::vector<ff_node*> W2(w2.size());
|
|
for(size_t i=0;i<W1.size();++i) W1[i]=w1[i];
|
|
for(size_t i=0;i<W2.size();++i) W2[i]=w2[i];
|
|
|
|
ff_node* emitter1 = farm1.getEmitter();
|
|
if (emitter1) newfarm.add_emitter(emitter1);
|
|
ff_node* collector2 = farm2.getCollector();
|
|
if (farm2.hasCollector()) newfarm.add_collector(collector2);
|
|
|
|
std::vector<ff_node*> Wtmp(W1.size());
|
|
for(size_t i=0;i<W1.size();++i) {
|
|
if (!node.isMultiOutput()) {
|
|
auto mo = new internal_mo_transformer(node);
|
|
assert(mo);
|
|
auto pc = new ff_comb(W1[i], mo,
|
|
farm1.isset_cleanup_workers() , true);
|
|
assert(pc);
|
|
Wtmp[i]=pc;
|
|
} else {
|
|
auto e = new E_t(node);
|
|
auto pc = new ff_comb(W1[i], e,
|
|
farm1.isset_cleanup_workers(), true);
|
|
assert(pc);
|
|
Wtmp[i]=pc;
|
|
}
|
|
}
|
|
a2a->add_firstset(Wtmp, farm2.ondemand_buffer(), true); // cleanup set to true
|
|
a2a->add_secondset(W2, farm2.isset_cleanup_workers());
|
|
if (farm2.isset_cleanup_workers()) farm2.cleanup_workers(false);
|
|
|
|
std::vector<ff_node*> W;
|
|
W.push_back(a2a);
|
|
newfarm.add_workers(W);
|
|
newfarm.cleanup_workers(); // a2a will be delated at the end
|
|
if (farm1.ondemand_buffer())
|
|
newfarm.set_scheduling_ondemand(farm1.ondemand_buffer());
|
|
|
|
return newfarm;
|
|
}
|
|
|
|
/*
|
|
* This function produced the NF of two farms having the same n. of workers.
|
|
* If the farms are ordered farm they must have the same ondemand buffer and
|
|
* the same ordering memory size.
|
|
*/
|
|
static inline const ff_farm combine_farms_nf(ff_farm& farm1, ff_farm& farm2) {
|
|
ff_farm newfarm;
|
|
|
|
if (farm1.getNWorkers() != farm2.getNWorkers()) {
|
|
error("combine_farms_nf, cannot combine farms with different number of workers\n");
|
|
return newfarm;
|
|
}
|
|
if (farm1.isOFarm() ^ farm2.isOFarm()) {
|
|
error("combine_farms_nf, if one of the two farms is ordered both must be ordered\n");
|
|
return newfarm;
|
|
}
|
|
|
|
if (farm1.isOFarm() && farm2.isOFarm()) {
|
|
if (farm1.ondemand_buffer() != farm2.ondemand_buffer()) {
|
|
error("combine_farms_nf, cannot combine ordered farms with different ondemand buffer\n");
|
|
return newfarm;
|
|
}
|
|
if (farm1.ordering_memory_size()!=farm2.ordering_memory_size()) {
|
|
error("combine_farms_nf, cannot combine ordered farms with different memory size\n");
|
|
return newfarm;
|
|
}
|
|
}
|
|
const svector<ff_node *> & w1= farm1.getWorkers();
|
|
const svector<ff_node *> & w2= farm2.getWorkers();
|
|
|
|
if (w1[0]->isMultiOutput() || w2[0]->isMultiInput()) { // NOTE: we suppose homogeneous workers
|
|
error("combine_farms_nf, cannot combine farms whose workers are either multi-output or multi-input nodes\n");
|
|
return newfarm;
|
|
}
|
|
if (w1[0]->isPipe() || w2[0]->isPipe()) { // NOTE: we suppose homogeneous workers
|
|
error("combine_farms_nf, cannot combine farms whose workers are pipeline nodes\n");
|
|
return newfarm;
|
|
}
|
|
std::vector<ff_node*> W1(w1.size());
|
|
std::vector<ff_node*> W2(w2.size());
|
|
for(size_t i=0;i<W1.size();++i) W1[i]=w1[i];
|
|
for(size_t i=0;i<W2.size();++i) W2[i]=w2[i];
|
|
|
|
ff_node* emitter1 = farm1.getEmitter();
|
|
if (emitter1) {
|
|
newfarm.add_emitter(emitter1);
|
|
if (farm1.isset_cleanup_emitter()) {
|
|
newfarm.cleanup_emitter(true);
|
|
farm1.cleanup_emitter(false);
|
|
}
|
|
}
|
|
ff_node* collector2 = farm2.getCollector();
|
|
if (farm2.hasCollector()) {
|
|
newfarm.add_collector(collector2);
|
|
if (farm2.isset_cleanup_collector()) {
|
|
newfarm.cleanup_collector(true);
|
|
farm2.cleanup_collector(false);
|
|
}
|
|
}
|
|
|
|
std::vector<ff_node*> Wtmp1(W1.size());
|
|
for(size_t i=0;i<W1.size();++i) {
|
|
auto pc = new ff_comb(W1[i], W2[i],
|
|
farm1.isset_cleanup_workers(),
|
|
farm2.isset_cleanup_workers());
|
|
assert(pc);
|
|
Wtmp1[i]=pc;
|
|
}
|
|
newfarm.add_workers(Wtmp1);
|
|
newfarm.cleanup_workers();
|
|
if (farm1.isset_cleanup_workers()) farm1.cleanup_workers(false);
|
|
if (farm2.isset_cleanup_workers()) farm2.cleanup_workers(false);
|
|
if (farm1.ondemand_buffer())
|
|
newfarm.set_scheduling_ondemand(farm1.ondemand_buffer());
|
|
if (farm1.isOFarm() || farm2.isOFarm())
|
|
newfarm.set_ordered(farm1.ordering_memory_size());
|
|
return newfarm;
|
|
}
|
|
|
|
/*
|
|
*
|
|
* This function allows to combine two farms where the first one is an ordered farm.
|
|
*
|
|
*
|
|
*/
|
|
static inline const ff_pipeline combine_ofarm_farm(ff_farm& farm1, ff_farm& farm2) {
|
|
ff_pipeline newpipe;
|
|
if (!farm1.isOFarm()) {
|
|
error("combine_ofarm_farm, the first farm is not an ordered farm");
|
|
return newpipe;
|
|
}
|
|
// here it would be possible to call directly the combine_farms_nf function but
|
|
// since this kind of transformation may violates the ordering semantics,
|
|
// the user must call it explicitly
|
|
if (farm2.isOFarm() && farm1.getNWorkers() == farm2.getNWorkers()) {
|
|
error("combine_ofarm_farm, two ordered farms with the same cardinality, the function cambine_farms_nf must be called explicitly\n");
|
|
//newpipe.add_stage(combine_farms_nf(farm1,farm2));
|
|
return newpipe;
|
|
}
|
|
// here we have that the first farm is an ordered farm and the second farm
|
|
// is either a standard farm or is an ordered farm with a number of workers
|
|
// that is different from the one of the first farm
|
|
|
|
ff_farm newfarm1;
|
|
|
|
if (farm1.ondemand_buffer())
|
|
newfarm1.set_scheduling_ondemand(farm1.ondemand_buffer());
|
|
|
|
ordered_lb* _lb= new ordered_lb(farm1.getNWorkers());
|
|
assert(_lb);
|
|
const size_t memsize = farm1.getNWorkers() * (2*newfarm1.ondemand_buffer()+3)+ DEF_OFARM_ONDEMAND_MEMORY;
|
|
newfarm1.ordered_resize_memory(memsize);
|
|
_lb->init(newfarm1.ordered_get_memory(), memsize);
|
|
newfarm1.setlb(_lb, true);
|
|
OrderedCollectorWrapper* cw = new OrderedCollectorWrapper(DEF_OFARM_ONDEMAND_MEMORY);
|
|
assert(cw);
|
|
|
|
// emitter1
|
|
ff_node* emitter1 = farm1.getEmitter();
|
|
if (emitter1) {
|
|
newfarm1.add_emitter(emitter1);
|
|
if (farm1.isset_cleanup_emitter()) {
|
|
newfarm1.cleanup_emitter(true);
|
|
farm1.cleanup_emitter(false);
|
|
}
|
|
}
|
|
// workers1
|
|
const svector<ff_node*>& w1= farm1.getWorkers();
|
|
std::vector<ff_node*> W1(w1.size());
|
|
for(size_t i=0;i<w1.size();++i) {
|
|
W1[i] = new OrderedWorkerWrapper(w1[i], farm1.isset_cleanup_workers());
|
|
assert(W1[i]);
|
|
}
|
|
if (farm1.isset_cleanup_workers())
|
|
farm1.cleanup_workers(false);
|
|
|
|
newfarm1.add_workers(W1);
|
|
newfarm1.cleanup_workers(true);
|
|
|
|
// collector1 + emitter2
|
|
ff_node* collector1 = farm1.getCollector();
|
|
ff_node* emitter2 = farm2.getEmitter();
|
|
if (!collector1 && !emitter2) {
|
|
farm2.change_emitter(cw, true);
|
|
} else {
|
|
if (!collector1) {
|
|
ff_comb *comb = new ff_comb(cw, emitter2,
|
|
true, farm2.isset_cleanup_emitter());
|
|
if (farm2.isset_cleanup_emitter())
|
|
farm2.cleanup_emitter(false);
|
|
|
|
farm2.change_emitter(comb, true);
|
|
} else {
|
|
if (!emitter2) {
|
|
ff_comb *comb = new ff_comb(cw, collector1,
|
|
true, farm1.isset_cleanup_collector());
|
|
if (farm1.isset_cleanup_collector())
|
|
farm1.cleanup_collector(false);
|
|
|
|
farm2.change_emitter(comb, true);
|
|
} else {
|
|
ff_comb *comb0 = new ff_comb(collector1,emitter2,
|
|
farm1.isset_cleanup_collector(),
|
|
farm2.isset_cleanup_emitter());
|
|
if (farm1.isset_cleanup_collector())
|
|
farm1.cleanup_collector(false);
|
|
if (farm2.isset_cleanup_emitter())
|
|
farm2.cleanup_emitter(false);
|
|
|
|
ff_comb *comb = new ff_comb(cw, comb0,
|
|
true, true);
|
|
farm2.change_emitter(comb, true);
|
|
}
|
|
}
|
|
}
|
|
newpipe.add_stage(newfarm1);
|
|
newpipe.add_stage(farm2);
|
|
|
|
return newpipe;
|
|
}
|
|
|
|
|
|
/*
|
|
*
|
|
* This function allows to combine two farms in several different ways
|
|
* depending on the parameter passed to the function (node1,node2 and mergeCE):
|
|
*
|
|
* case1 - node1 and node2 are both null:
|
|
* 1. mergeCE==false: it produces a pipeline of a single farm
|
|
* whose worker is an all-to-all building block (equivalent behavior
|
|
* of calling combine_farms_a2a(farm1,farm2)
|
|
* 2. mergeCE==true: if the parallelism degree of the two farms is
|
|
* the same, it produces a pipeline of a single farm
|
|
* whose workers are a composition of both farm1 and farm2 workers (normal form).
|
|
* If the parallelism degree of the two farms is different, we fall back
|
|
* to the case1.1
|
|
*
|
|
* case2 - node1 is null and node2 is not null
|
|
* 1. mergeCE==false: it produces a pipeline of a single farm
|
|
* whose worker is an all-to-all building block where the nodes of the second set
|
|
* is a composition of node2 and farm2's workers
|
|
* 2. mergeCE==true: this produces a pipeline of two farms where the first
|
|
* farm has no collector while the second farm has as emitter the node2.
|
|
*
|
|
* case3 - node1 is not null and node2 is null
|
|
* 1. mergeCE==false: it produces a pipeline of a single farm
|
|
* whose worker is an all-to-all building block where the nodes of the first set
|
|
* is a composition of farm1's workers and node1
|
|
* 2. mergeCE==true: this produces a pipeline of two farms where the first
|
|
* farm has no collector while the second farm has as emitter the node1.
|
|
*
|
|
* case4 - both node1 and node2 are both not null
|
|
* 1. mergeCE==false: it produces a pipeline of a single farm
|
|
* whose worker is an all-to-all building block where the nodes of the first set
|
|
* is a composition of farm1's workers and node1 whereas the nodes of the second set
|
|
* is a composition of nodes2 and farm2's workers.
|
|
* 2. mergeCE==true: this produces a pipeline of two farms where the first
|
|
* farm has no collector while the second farm has as emitter the composition
|
|
* of node1 and node2.
|
|
*
|
|
* WARNING: farm1 and farm2 are passed by reference and they might be changed!
|
|
*/
|
|
template<typename E_t, typename C_t>
|
|
static inline const ff_pipeline combine_farms(ff_farm& farm1, const C_t *node1,
|
|
ff_farm& farm2, const E_t *node2,
|
|
bool mergeCE) {
|
|
ff_pipeline newpipe;
|
|
|
|
if (mergeCE) { // we have to merge nodes!!!
|
|
|
|
if (farm1.isOFarm() || farm2.isOFarm()) {
|
|
if (node1!=nullptr || node2!=nullptr) { // TODO
|
|
error("combine_farms, FEATURE NOT YET SUPPORTED, if at least one of the two farms is an ordered farm then node1 and node2 must be nullptr\n");
|
|
return newpipe;
|
|
}
|
|
if (farm1.getNWorkers() == farm2.getNWorkers()) {
|
|
// here it would be possible to call directly the combine_farms_nf function but
|
|
// since this kind of transformation may violates the ordering semantics,
|
|
// the user must call it explicitly
|
|
error("combine_farms, at least one of the two farms is ordered and they have the same cardinality, the function cambine_farms_nf must be called explicitly\n");
|
|
//newpipe.add_stage(combine_farms_nf(farm1,farm2));
|
|
return newpipe;
|
|
}
|
|
// the first farm is ordered
|
|
if (farm1.isOFarm()) {
|
|
auto pipe = combine_ofarm_farm(farm1, farm2);
|
|
return pipe;
|
|
}
|
|
// the second farm is ordered
|
|
// here we can just remove the collector of the first farm
|
|
farm1.remove_collector();
|
|
newpipe.add_stage(&farm1);
|
|
newpipe.add_stage(&farm2);
|
|
return newpipe;
|
|
}
|
|
|
|
if (node2==nullptr && node1==nullptr) {
|
|
if (farm1.getNWorkers() == farm2.getNWorkers()) { // case1.2
|
|
newpipe.add_stage(combine_farms_nf(farm1,farm2));
|
|
return newpipe;
|
|
}
|
|
// fall back to case1.1
|
|
// we cannot merge workers so we combine the two farms introducing
|
|
// the all-to-all building block
|
|
farm1.remove_collector();
|
|
farm2.change_emitter((ff_minode*)nullptr);
|
|
newpipe.add_stage(combine_farms_a2a(farm1,farm2));
|
|
return newpipe;
|
|
}
|
|
if (node2!=nullptr && node1!=nullptr) { // case4.2
|
|
if (node2->isComp() && !node2->isMultiOutput()) {
|
|
error("combine_farms, if node2 is a combine node, then it must be multi-output\n");
|
|
return newpipe;
|
|
}
|
|
// here we compose node1 and node2 and we set this new
|
|
// node as emitter of the second farm
|
|
|
|
// we require that the last stage of the combine is a multi-output node
|
|
if (!node2->isMultiOutput()) {
|
|
if (node2->isMultiInput()) { // this is a multi-input node
|
|
error("combine_farms, node2 is multi-input without being a combine, this is currently needed to apply the transformation (FEATURE NONT YET SUPPORTED)\n");
|
|
return newpipe;
|
|
}
|
|
auto second = new internal_mo_transformer(*node2);
|
|
assert(second);
|
|
auto first = new C_t(*node1);
|
|
assert(first);
|
|
auto p = new ff_comb(first, second, true, true);
|
|
assert(p);
|
|
farm2.change_emitter(p,true); // cleanup set
|
|
} else {
|
|
auto ec= combine_nodes(*node1, *node2);
|
|
auto pec = new decltype(ec)(ec);
|
|
farm2.change_emitter(pec,true); // cleanup set
|
|
}
|
|
farm1.remove_collector();
|
|
newpipe.add_stage(&farm1);
|
|
newpipe.add_stage(&farm2);
|
|
return newpipe;
|
|
}
|
|
if (node1 == nullptr) { // case2.2
|
|
assert(node2!=nullptr);
|
|
farm1.remove_collector();
|
|
farm2.change_emitter((ff_minode*)nullptr);
|
|
|
|
newpipe.add_stage(&farm1);
|
|
newpipe.add_stage(&farm2);
|
|
return newpipe;
|
|
}
|
|
assert(node1!=nullptr); // case3.2
|
|
if (node1->isMultiInput()) {
|
|
const struct hnode:ff_monode {
|
|
void* svc(void*in) {return in;}
|
|
} helper_node;
|
|
farm1.remove_collector();
|
|
const auto comp = combine_nodes(*node1, helper_node);
|
|
farm2.change_emitter(comp);
|
|
} else {
|
|
farm1.remove_collector();
|
|
farm2.change_emitter(const_cast<C_t*>(node1));
|
|
}
|
|
newpipe.add_stage(&farm1);
|
|
newpipe.add_stage(&farm2);
|
|
return newpipe;
|
|
|
|
}
|
|
// mergeCE is false
|
|
|
|
if (farm1.isOFarm() || farm2.isOFarm()) {
|
|
error("combine_farms, A2A cannot be introduced if one of the two farms is an ordered farms\n");
|
|
return newpipe;
|
|
}
|
|
|
|
if (node2==nullptr && node1==nullptr) { // case1.1
|
|
farm1.remove_collector();
|
|
farm2.change_emitter((ff_minode*)nullptr);
|
|
newpipe.add_stage(combine_farms_a2a(farm1,farm2));
|
|
return newpipe;
|
|
}
|
|
if (node2!=nullptr && node1==nullptr) {
|
|
newpipe.add_stage(combine_farms_a2a(farm1, *node2, farm2));
|
|
return newpipe;
|
|
}
|
|
if (node2==nullptr && node1!=nullptr) { // case3.1
|
|
ff_a2a *a2a = new ff_a2a;
|
|
if (a2a == nullptr) {
|
|
error("combine_farms, FATAL ERROR, not enough memory\n");
|
|
return newpipe;
|
|
}
|
|
ff_farm newfarm;
|
|
const svector<ff_node *> & w1= farm1.getWorkers();
|
|
const svector<ff_node *> & w2= farm2.getWorkers();
|
|
|
|
std::vector<ff_node*> W1(w1.size());
|
|
std::vector<ff_node*> W2(w2.size());
|
|
for(size_t i=0;i<W1.size();++i) W1[i]=w1[i];
|
|
for(size_t i=0;i<W2.size();++i) W2[i]=w2[i];
|
|
|
|
ff_node* emitter1 = farm1.getEmitter();
|
|
if (emitter1) {
|
|
newfarm.add_emitter(emitter1);
|
|
if (farm1.isset_cleanup_emitter()) {
|
|
newfarm.cleanup_emitter(true);
|
|
farm1.cleanup_emitter(false);
|
|
}
|
|
}
|
|
ff_node* collector2 = farm2.getCollector();
|
|
if (farm2.hasCollector()) {
|
|
newfarm.add_collector(collector2);
|
|
if (farm2.isset_cleanup_collector()) {
|
|
newfarm.cleanup_collector(true);
|
|
farm2.cleanup_collector(false);
|
|
}
|
|
}
|
|
|
|
std::vector<ff_node*> Wtmp1(W1.size());
|
|
for(size_t i=0;i<W1.size();++i) {
|
|
if (!node1->isMultiOutput()) {
|
|
auto mo = new internal_mo_transformer(*node1);
|
|
assert(mo);
|
|
auto pc = new ff_comb(W1[i], mo,
|
|
farm1.isset_cleanup_workers() , true);
|
|
assert(pc);
|
|
Wtmp1[i]=pc;
|
|
} else {
|
|
auto c = new C_t(*node1);
|
|
assert(c);
|
|
auto pc = new ff_comb(W1[i], c,
|
|
farm1.isset_cleanup_workers(), true);
|
|
assert(pc);
|
|
Wtmp1[i]=pc;
|
|
}
|
|
}
|
|
if (farm1.isset_cleanup_workers()) farm1.cleanup_workers(false);
|
|
a2a->add_firstset(Wtmp1, farm2.ondemand_buffer(), true); // cleanup set to true
|
|
a2a->add_secondset(W2, farm2.isset_cleanup_workers());
|
|
if (farm2.isset_cleanup_workers()) farm2.cleanup_workers(false);
|
|
|
|
std::vector<ff_node*> W;
|
|
W.push_back(a2a);
|
|
newfarm.add_workers(W);
|
|
newfarm.cleanup_workers(); // a2a will be delated at the end
|
|
if (farm1.ondemand_buffer())
|
|
newfarm.set_scheduling_ondemand(farm1.ondemand_buffer());
|
|
|
|
newpipe.add_stage(newfarm);
|
|
return newpipe;
|
|
}
|
|
assert(node2!=nullptr && node1!=nullptr); // case4.1
|
|
|
|
if (!mergeCE) {
|
|
// TODO: we can relax the following two constraints.
|
|
if (node1->isMultiInput()) {
|
|
error("combine_farms, node1 cannot be a multi-input node\n");
|
|
return newpipe;
|
|
}
|
|
if (node2->isMultiOutput()) {
|
|
error("combine_farms, node2 cannot be a multi-output node\n");
|
|
return newpipe;
|
|
}
|
|
}
|
|
|
|
ff_a2a *a2a = new ff_a2a;
|
|
if (a2a == nullptr) {
|
|
error("combine_farms, FATAL ERROR, not enough memory\n");
|
|
return newpipe;
|
|
}
|
|
|
|
ff_farm newfarm;
|
|
const svector<ff_node *> & w1= farm1.getWorkers();
|
|
const svector<ff_node *> & w2= farm2.getWorkers();
|
|
|
|
std::vector<ff_node*> W1(w1.size());
|
|
std::vector<ff_node*> W2(w2.size());
|
|
for(size_t i=0;i<W1.size();++i) W1[i]=w1[i];
|
|
for(size_t i=0;i<W2.size();++i) W2[i]=w2[i];
|
|
|
|
ff_node* emitter1 = farm1.getEmitter();
|
|
if (emitter1) {
|
|
newfarm.add_emitter(emitter1);
|
|
if (farm1.isset_cleanup_emitter()) {
|
|
newfarm.cleanup_emitter(true);
|
|
farm1.cleanup_emitter(false);
|
|
}
|
|
}
|
|
ff_node* collector2 = farm2.getCollector();
|
|
if (farm2.hasCollector()) {
|
|
newfarm.add_collector(collector2);
|
|
if (farm2.isset_cleanup_collector()) {
|
|
newfarm.cleanup_collector(true);
|
|
farm2.cleanup_collector(false);
|
|
}
|
|
}
|
|
|
|
std::vector<ff_node*> Wtmp1(W1.size());
|
|
for(size_t i=0;i<W1.size();++i) {
|
|
if (!node1->isMultiOutput()) {
|
|
auto mo = new internal_mo_transformer(*node1);
|
|
assert(mo);
|
|
auto pc = new ff_comb(W1[i], mo,
|
|
farm1.isset_cleanup_workers() , true);
|
|
Wtmp1[i]=pc;
|
|
} else {
|
|
auto c = new C_t(*node1);
|
|
assert(c);
|
|
auto pc = new ff_comb(W1[i], c,
|
|
farm1.isset_cleanup_workers(), true);
|
|
Wtmp1[i]=pc;
|
|
}
|
|
}
|
|
if (farm1.isset_cleanup_workers()) farm1.cleanup_workers(false);
|
|
a2a->add_firstset(Wtmp1, farm2.ondemand_buffer(), true); // cleanup set to true
|
|
|
|
std::vector<ff_node*> Wtmp2(W2.size());
|
|
for(size_t i=0;i<W2.size();++i) {
|
|
if (!node2->isMultiInput()) {
|
|
auto mi = new internal_mi_transformer(*node2);
|
|
assert(mi);
|
|
auto pc = new ff_comb(mi,W2[i],
|
|
true, farm2.isset_cleanup_workers());
|
|
assert(pc);
|
|
Wtmp2[i]=pc;
|
|
} else {
|
|
auto e = new E_t(*node2);
|
|
assert(e);
|
|
auto pc = new ff_comb(e, W2[i],
|
|
true, farm2.isset_cleanup_workers());
|
|
assert(pc);
|
|
Wtmp2[i]=pc;
|
|
}
|
|
}
|
|
if (farm2.isset_cleanup_workers()) farm2.cleanup_workers(false);
|
|
a2a->add_secondset(Wtmp2, true); // cleanup set to true
|
|
|
|
std::vector<ff_node*> W;
|
|
W.push_back(a2a);
|
|
newfarm.add_workers(W);
|
|
newfarm.cleanup_workers(); // a2a will be deleted at the end
|
|
if (farm1.ondemand_buffer())
|
|
newfarm.set_scheduling_ondemand(farm1.ondemand_buffer());
|
|
|
|
newpipe.add_stage(newfarm);
|
|
return newpipe;;
|
|
}
|
|
|
|
|
|
|
|
} // namespace ff
|
|
#endif /* FF_COMBINE_HPP */
|