mesytec-mnode/external/taskflow-3.8.0/benchmarks/hetero_traversal/graph.hpp

126 lines
2.6 KiB
C++
Raw Normal View History

2025-01-04 01:25:05 +01:00
#include <taskflow/taskflow.hpp>
#include <taskflow/cuda/cudaflow.hpp>
#include <fstream>
struct pair_hash {
template <typename T1, typename T2>
size_t operator ()(const std::pair<T1, T2>& pair) const {
auto h1 = std::hash<T1>()(pair.first);
auto h2 = std::hash<T2>()(pair.second);
return h1^h2;
}
};
struct Graph {
struct Node {
int v, g;
};
struct Edge {
int u, v;
};
int num_nodes;
int num_edges;
int num_gpus;
std::vector<Edge> edges;
std::vector<Node> nodes;
Graph(const std::string& path) :
num_gpus {static_cast<int>(tf::cuda_get_num_devices())} {
std::ifstream ifs(path);
if(!ifs) throw std::runtime_error("failed to open the file");
ifs >> num_nodes >> num_edges;
nodes.resize(num_nodes);
for(int i=0; i<num_nodes; ++i) {
nodes[i].v = i;
ifs >> nodes[i].g;
}
for(int i=0; i<num_edges; ++i) {
Edge e;
ifs >> e.u >> e.v;
edges.push_back(e);
}
}
Graph(int V, int E, int cuda_ratio) :
num_nodes {V},
num_edges {E},
num_gpus {static_cast<int>(tf::cuda_get_num_devices())} {
std::unordered_set<std::pair<int, int>, pair_hash> set;
num_edges = std::min(num_edges, (num_nodes)*(num_nodes-1)/2);
for(int j=0; j<num_nodes; j++) {
Node v;
v.v = j;
v.g = rand()%cuda_ratio == 0 ? rand()%num_gpus : -1;
nodes.push_back(v);
}
for (int j=0; j<num_edges; j++) {
std::pair<int, int> p;
p.first = rand() % num_nodes;
p.second = rand() % num_nodes;
while(set.find(p) != set.end() || p.first >= p.second) {
p.first = rand() % num_nodes;
p.second = rand() % num_nodes;
if(p.first >= p.second) {
std::swap(p.first, p.second);
}
};
set.insert(p);
}
for (auto& pair : set) {
Edge e;
e.u = pair.first;
e.v = pair.second;
edges.push_back(e);
}
set.clear();
}
void dump(std::ostream& os) {
os << num_nodes << ' ' << num_edges << '\n';
for(const auto& v : nodes) {
os << v.g << '\n';
}
for(const auto& e : edges) {
os << e.u << ' ' << e.v << '\n';
}
}
size_t size() const {
return nodes.size() + edges.size();
}
};
// saxpy kernel
template <typename T>
__global__ void add(T* x, T* y, T* z, int n) {
int i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < n) {
z[i] = x[i] + y[i];
}
}
inline int N = 1024;
std::chrono::microseconds measure_time_taskflow(const Graph&, unsigned, unsigned);
std::chrono::microseconds measure_time_tbb(const Graph&, unsigned, unsigned);
std::chrono::microseconds measure_time_omp(const Graph&, unsigned, unsigned);