259 lines
7.5 KiB
C++
259 lines
7.5 KiB
C++
|
/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||
|
|
||
|
|
||
|
/* ***************************************************************************
|
||
|
*
|
||
|
* FastFlow is free software; you can redistribute it and/or modify it
|
||
|
* under the terms of the GNU Lesser General Public License version 3 as
|
||
|
* published by the Free Software Foundation.
|
||
|
* Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3
|
||
|
* or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT)
|
||
|
*
|
||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||
|
* License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU Lesser General Public License
|
||
|
* along with this program; if not, write to the Free Software Foundation,
|
||
|
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||
|
*
|
||
|
****************************************************************************
|
||
|
*/
|
||
|
|
||
|
/* Author: Massimo Torquati
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
// This file contains some configuration variables. Some of them are
|
||
|
// particularly critical for performance matters, for example:
|
||
|
// FF_MAPPING_STRING, BLOCKING_MODE, TRACE_FASTFLOW, etc.
|
||
|
//
|
||
|
|
||
|
#ifndef FF_CONFIG_HPP
|
||
|
#define FF_CONFIG_HPP
|
||
|
|
||
|
#include <cstddef>
|
||
|
#include <climits>
|
||
|
#if defined(TRACE_FASTFLOW)
|
||
|
#include <iostream>
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/*
|
||
|
* If NO_DEFAULT_MAPPING is not defined (and if FF_MAPPING_STRING is equal
|
||
|
* to ""), the FastFlow library pins each spawned thread to a core context
|
||
|
* according to a simple policy: thread 0 to core 0, thread 1
|
||
|
* to core 1, and so on restarting from the beginning if there are more
|
||
|
* threads than core contexts (which is usually not a good idea if
|
||
|
* performance matters).
|
||
|
* Depending on the OS numbering of core contexts, core 0 and core 1 may be
|
||
|
* "far away" one each other, e.g., they could be on two distinct CPUs,
|
||
|
* thus not sharing any level of cache.
|
||
|
* Therefore, to control the mapping of the thread when the OS numbering
|
||
|
* is a bit mess (e.g. Power8, AMD machines, some Intel Xeon), you have
|
||
|
* two options:
|
||
|
* 1. to use the Mammut library, which provides a layer for discovering
|
||
|
* core contexts
|
||
|
* 2. to set the FF_MAPPING_STRING in this file.
|
||
|
*
|
||
|
* For case 2, the simplest option is to run the Bash script
|
||
|
* 'mapping_string.sh', which returns a suitable string that can be
|
||
|
* copy-paste in the FF_MAPPING_STRING preprocessor variable.
|
||
|
* Note that, if you wish (and trust it) the script can modify the
|
||
|
* config.hpp file for you. The script also sets the FF_NUM_CORES and
|
||
|
* FF_NUM_REAL_CORES variables.
|
||
|
* Example:
|
||
|
* > ./mapping_string.sh
|
||
|
* > FF_MAPPING_STRING="0,2,1,3"
|
||
|
* > FF_NUM_CORES=4
|
||
|
* > FF_NUM_REAL_CORES=2
|
||
|
* > Do you want that I change the ./config.hpp file for you? (y/N) y
|
||
|
* > This is the new FF_MAPPING_STRING variable in the ./config.hpp file:
|
||
|
* > #if !defined MAPPING_STRING
|
||
|
* > #define FF_MAPPING_STRING "0,2,1,3"
|
||
|
* > #else
|
||
|
* > ...
|
||
|
*
|
||
|
*/
|
||
|
/*
|
||
|
* NOTE: if FF_MAPPING_STRING is "" (default), FastFlow executes a linear
|
||
|
* mapping of threads.
|
||
|
*/
|
||
|
#if !defined MAPPING_STRING
|
||
|
#define FF_MAPPING_STRING ""
|
||
|
#else
|
||
|
#define FF_MAPPING_STRING MAPPING_STRING
|
||
|
#endif
|
||
|
/*
|
||
|
* It is the number of the logical cores of the machine.
|
||
|
* NOTE: if FF_NUM_CORES is -1 (default), FastFlow will use ff_numCores()
|
||
|
* (which is a costly function).
|
||
|
*/
|
||
|
#if !defined NUM_CORES
|
||
|
#define FF_NUM_CORES -1
|
||
|
#else
|
||
|
#define FF_NUM_CORES NUM_CORES
|
||
|
#endif
|
||
|
/*
|
||
|
* It is the number of the physical cores of the machine.
|
||
|
* NOTE: if FF_NUM_REAL_CORES is -1 (default), FastFlow will use
|
||
|
* ff_realNumCores() (which is a costly function)
|
||
|
*/
|
||
|
#if !defined NUM_REAL_CORES
|
||
|
#define FF_NUM_REAL_CORES -1
|
||
|
#else
|
||
|
#define FF_NUM_REAL_CORES NUM_REAL_CORES
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#if defined(FF_BOUNDED_BUFFER)
|
||
|
#define FF_FIXED_SIZE true
|
||
|
#else // NOTE: by default the queues are unbounded!!!!
|
||
|
#define FF_FIXED_SIZE false
|
||
|
#endif
|
||
|
|
||
|
// WARNING: Do not change the following with SWSR_Ptr_Buffer unless
|
||
|
// you know what your are doing....
|
||
|
#define FFBUFFER uSWSR_Ptr_Buffer
|
||
|
|
||
|
/*
|
||
|
* This is the default buffer capacity and the default difference between the input
|
||
|
* and output channels capacity.
|
||
|
*
|
||
|
*/
|
||
|
#if !defined(DEFAULT_BUFFER_CAPACITY)
|
||
|
#define DEFAULT_BUFFER_CAPACITY 2048
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/* To save energy and improve hyperthreading performance
|
||
|
* define the following macro
|
||
|
*/
|
||
|
//#define SPIN_USE_PAUSE 1
|
||
|
|
||
|
/* To enable OPENCL support
|
||
|
*
|
||
|
*/
|
||
|
//#define FF_OPENCL 1
|
||
|
|
||
|
|
||
|
/* To enable task callbacks
|
||
|
*
|
||
|
* If enabled, 2 callbacks are called by the run-time:
|
||
|
* - one before receiving the task in input
|
||
|
* - one just after having computed the task (before sending it out)
|
||
|
*/
|
||
|
//#define FF_TASK_CALLBACK 1
|
||
|
|
||
|
/*
|
||
|
****** DISTRIBUTED SUPPORT PARAMETERS
|
||
|
*/
|
||
|
#define MAX_RETRIES 1500
|
||
|
#define AGGRESSIVE_TRESHOLD 1000
|
||
|
|
||
|
#define MAXBACKLOG 32
|
||
|
|
||
|
/*
|
||
|
****** END DISTRIBUTED VERSION PARAMETERS
|
||
|
*/
|
||
|
|
||
|
|
||
|
#if defined(TRACE_FASTFLOW)
|
||
|
#define FFTRACE(x) x
|
||
|
#else
|
||
|
#define FFTRACE(x)
|
||
|
#endif
|
||
|
|
||
|
#if defined(BLOCKING_MODE)
|
||
|
#define FF_RUNTIME_MODE true
|
||
|
#else
|
||
|
#define FF_RUNTIME_MODE false // by default the run-time is in nonblocking mode
|
||
|
#endif
|
||
|
|
||
|
/* Used in blocking mode to limit the amount of time
|
||
|
* before checking again the input/output queue.
|
||
|
* NOTE: it cannot be greater than 1e+9 (i.e. 1sec)
|
||
|
*/
|
||
|
#define FF_TIMEDWAIT_NS 200000
|
||
|
|
||
|
/*
|
||
|
* Used in the ordered farm pattern (ff_OFarm).
|
||
|
* It is the maximum amount of data elements buffered in the farm's collector
|
||
|
* to preserve output ordering. In some case such value has to be increased
|
||
|
* (see set_scheduling_ondemand in ff_ofarm.hpp)
|
||
|
*/
|
||
|
#define DEF_OFARM_ONDEMAND_MEMORY 10000
|
||
|
|
||
|
|
||
|
// If the following is defined, then an initial barrier is executed among all threads
|
||
|
// to ensure that all threads are started. It can be commented out if that condition
|
||
|
// is not needed. Usually it is useful for debugging purposes.
|
||
|
// #define FF_INITIAL_BARRIER
|
||
|
|
||
|
// Which barrier implementation to use
|
||
|
#if !defined(BARRIER_T)
|
||
|
#define BARRIER_T spinBarrier
|
||
|
#endif
|
||
|
|
||
|
// maximum number of threads that can be spawned
|
||
|
#if !defined(MAX_NUM_THREADS)
|
||
|
#define MAX_NUM_THREADS 512
|
||
|
#endif
|
||
|
|
||
|
// maximum number of workers in a farm
|
||
|
#define DEF_MAX_NUM_WORKERS (MAX_NUM_THREADS-2)
|
||
|
|
||
|
// NOTE: BACKOFF_MIN/MAX are lower and upper bound backoff values.
|
||
|
// Notice that backoff bounds are highly dependent on the system and
|
||
|
// from the concurrency levels. This values should be carefully tuned
|
||
|
// in order to achieve the maximum performance.
|
||
|
#if !defined(BACKOFF_MIN)
|
||
|
#define BACKOFF_MIN 128
|
||
|
#endif
|
||
|
#if !defined(BACKOFF_MAX)
|
||
|
#define BACKOFF_MAX 1024
|
||
|
#endif
|
||
|
|
||
|
#if !defined(CACHE_LINE_SIZE)
|
||
|
#define CACHE_LINE_SIZE 64
|
||
|
#endif
|
||
|
|
||
|
|
||
|
// TODO:
|
||
|
//#if defined(NO_CMAKE_CONFIG)
|
||
|
|
||
|
// TODO change to __GNUC__ that is portable. GNUC specific code currently works
|
||
|
// on linux only
|
||
|
#if defined(__USE_GNU) //linux
|
||
|
//#if defined(__GNUC__)
|
||
|
#define HAVE_PTHREAD_SETAFFINITY_NP 1
|
||
|
//#warning "Is GNU compiler"
|
||
|
#endif
|
||
|
|
||
|
#if defined(__APPLE__)
|
||
|
#include <AvailabilityMacros.h>
|
||
|
#if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && (__MAC_OS_X_VERSION_MIN_REQUIRED >= 1050)
|
||
|
#define MAC_OS_X_HAS_AFFINITY 1
|
||
|
#else
|
||
|
#define MAC_OS_X_HAS_AFFINITY 0
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
//#else
|
||
|
// the config.h file will be generated by cmake
|
||
|
//#include <ff/config.h>
|
||
|
//#endif // NO_CMAKE_CONFIG
|
||
|
|
||
|
#if defined(USE_CMAKE_CONFIG) && !defined(NOT_USE_CMAKE_CONFIG)
|
||
|
#include <cmake.modules/ffconfig.h>
|
||
|
#endif
|
||
|
|
||
|
// OpenCL additional code needed to compile kernels
|
||
|
#define FF_OPENCL_DATATYPES_FILE "ff_opencl_datatypes.cl"
|
||
|
|
||
|
// Convenience macros.
|
||
|
#define FF_IGNORE_UNUSED(x) static_cast<void>(x)
|
||
|
|
||
|
#endif /* FF_CONFIG_HPP */
|