mesytec-mnode/external/taskflow-3.8.0/tfprof/server/tfprof.cpp
2025-01-04 01:25:05 +01:00

644 lines
16 KiB
C++

// 3rd-party include
#include <httplib/httplib.hpp>
#include <CLI11/CLI11.hpp>
#include <nlohmann/json.hpp>
#include <spdlog/spdlog.h>
#include <taskflow/taskflow.hpp>
#include <cmath>
namespace tf {
class Database {
public:
enum ViewType {
CLUSTER = 0,
CRITICALITY
};
struct WorkerData {
size_t eid;
size_t wid;
size_t lid;
std::string name;
std::vector<Segment> tasks;
WorkerData(
size_t e, size_t w, size_t l, std::string n, std::vector<Segment> t
) :
eid{e}, wid{w}, lid{l}, name {std::move(n)}, tasks{std::move(t)} {
}
WorkerData(const WorkerData&) = delete;
WorkerData(WorkerData&&) = default;
WorkerData& operator = (const WorkerData&) = delete;
WorkerData& operator = (WorkerData&&) = default;
std::optional<size_t> lower_bound(observer_stamp_t value) const {
size_t slen = tasks.size();
size_t beg, end, mid;
std::optional<size_t> l;
// l = minArg {span[1] >= zoomX[0]}
beg = 0, end = slen;
while(beg < end) {
mid = (beg + end) >> 1;
if(tasks[mid].end >= value) {
end = mid;
l = (l == std::nullopt) ? mid : std::min(mid, *l);
}
else {
beg = mid + 1;
}
};
return l;
}
std::optional<size_t> upper_bound(observer_stamp_t value) const {
size_t slen = tasks.size();
size_t beg, end, mid;
std::optional<size_t> r;
// r = maxArg {span[0] <= zoomX[1]}
beg = 0, end = slen;
while(beg < end) {
mid = (beg + end) >> 1;
if(tasks[mid].beg <= value) {
beg = mid + 1;
r = (r == std::nullopt) ? mid : std::max(mid, *r);
}
else {
end = mid;
}
}
return r;
}
};
struct Criticality {
size_t i;
std::vector<Segment>::const_iterator key;
Criticality(size_t in_i, std::vector<Segment>::const_iterator in_key) :
i{in_i}, key{in_key} {
}
};
struct CriticalityComparator {
bool operator () (const Criticality& a, const Criticality& b) const {
return a.key->span() > b.key->span();
}
};
struct CriticalityHeap : public std::priority_queue<
Criticality, std::vector<Criticality>, CriticalityComparator
> {
void sort() {
std::sort(c.begin(), c.end(), [] (const auto& a, const auto& b) {
if(a.i == b.i) {
return a.key->beg < b.key->beg;
}
return a.i < b.i;
});
}
const std::vector<Criticality>& get() const {
return c;
}
};
struct Cluster {
size_t i;
size_t f; // from task
size_t t; // to task (inclusive)
observer_stamp_t::duration k; // key
Cluster(size_t in_i, size_t in_f, size_t in_t, observer_stamp_t::duration in_k) :
i{in_i}, f{in_f}, t{in_t}, k{in_k} {
}
using iterator_t = std::list<Cluster>::iterator;
};
struct ClusterComparator {
bool operator () (Cluster::iterator_t a, Cluster::iterator_t b) const {
return a->k > b->k;
}
};
using ClusterHeap = std::priority_queue<
Cluster::iterator_t, std::vector<Cluster::iterator_t>, ClusterComparator
>;
public:
Database(const std::string& fpath) {
std::ifstream ifs(fpath);
if(!ifs) {
TF_THROW("failed to open profile data ", fpath);
}
ProfileData pd;
tf::Deserializer<std::ifstream> deserializer(ifs);
deserializer(pd);
// find the minimum starting point
for(auto& timeline : pd.timelines) {
if(timeline.origin < _minX) {
_minX = timeline.origin;
}
}
// conver to flat data
_num_executors = pd.timelines.size();
for(size_t e=0; e<pd.timelines.size(); e++) {
_num_workers += pd.timelines[e].segments.size();
for(size_t w=0; w<pd.timelines[e].segments.size(); w++) {
for(size_t l=0; l<pd.timelines[e].segments[w].size(); l++) {
// a new worker data
WorkerData wd(
e, w, l, stringify("E", e, ".W", w, ".L", l),
std::move(pd.timelines[e].segments[w][l])
);
if(!wd.tasks.empty()) {
if(wd.tasks.front().beg < _minX) _minX = wd.tasks.front().beg;
if(wd.tasks.back().end > _maxX) _maxX = wd.tasks.back().end;
}
_num_tasks += wd.tasks.size();
_wdmap[wd.name] = _wd.size();
_wd.push_back(std::move(wd));
}
}
}
}
template <typename D>
void query_criticality(
std::ostream& os,
const std::optional<D>& xbeg, const std::optional<D>& xend,
const std::optional<std::vector<std::string>>& workers,
size_t limit
) const {
auto x = decode_zoomx(xbeg, xend);
auto w = decode_zoomy(workers);
CriticalityHeap heap;
// bsearch the range of segments for each worker data
// TODO: parallel_for?
for(size_t i=0; i<w.size(); i++) {
// r = maxArg {span[0] <= zoomX[1]}
auto r = _wd[w[i]].upper_bound(x.second);
if(r == std::nullopt) {
continue;
}
// l = minArg {span[1] >= zoomX[0]}
auto l = _wd[w[i]].lower_bound(x.first);
if(l == std::nullopt || *l > *r) {
continue;
}
// range ok
for(size_t s=*l; s<=*r; s++) {
heap.emplace(i, _wd[w[i]].tasks.begin() + s);
while(heap.size() > limit) {
heap.pop();
}
}
}
heap.sort();
auto& crits = heap.get();
size_t cursor = 0;
// Output the segments
bool first_worker = true;
os << "[";
for(size_t i=0; i<w.size(); i++) {
if(cursor < crits.size() && crits[cursor].i < i) {
TF_THROW("impossible ...");
}
if(!first_worker) {
os << ",";
}
else {
first_worker = false;
}
os << "{\"executor\":\"" << _wd[w[i]].eid << "\","
<< "\"worker\":\"" << _wd[w[i]].name << "\","
<< "\"segs\": [";
size_t T=0, loads[TASK_TYPES.size()] = {0}, n=0;
bool first_crit = true;
for(; cursor < crits.size() && crits[cursor].i == i; cursor++) {
n++;
if(!first_crit) {
os << ",";
}
else {
first_crit = false;
}
// single task
os << "{";
const auto& task = *crits[cursor].key;
os << "\"name\":\"" << task.name << "\","
<< "\"type\":\"" << to_string(task.type) << "\","
<< "\"span\": [" << std::chrono::duration_cast<D>(task.beg-_minX).count()
<< ","
<< std::chrono::duration_cast<D>(task.end-_minX).count()
<< "]";
os << "}";
// calculate load
size_t t = std::chrono::duration_cast<D>(task.span()).count();
T += t;
loads[static_cast<int>(task.type)] += t;
}
os << "],\"tasks\":\"" << n << "\",";
// load
os << "\"load\":[";
size_t x = 0;
for(size_t k=0; k<TASK_TYPES.size(); k++) {
auto type = static_cast<int>(TASK_TYPES[k]);
if(k) os << ",";
os << "{\"type\":\"" << to_string(TASK_TYPES[k]) << "\","
<< "\"span\":[" << x << "," << x+loads[type] << "],"
<< "\"ratio\":" << (T>0 ? loads[type]*100.0f/T : 0) << "}";
x+=loads[type];
}
os << "],";
// totalTime
os << "\"totalTime\":" << T;
os << "}";
}
os << "]";
}
template <typename D>
void query_cluster(
std::ostream& os,
const std::optional<D>& xbeg, const std::optional<D>& xend,
const std::optional<std::vector<std::string>>& workers,
size_t limit
) const {
auto x = decode_zoomx(xbeg, xend);
auto w = decode_zoomy(workers);
std::vector<std::list<Cluster>> clusters{w.size()};
ClusterHeap heap;
// bsearch the range of segments for each worker data
// TODO: parallel_for?
for(size_t i=0; i<w.size(); i++) {
// r = maxArg {span[0] <= zoomX[1]}
auto r = _wd[w[i]].upper_bound(x.second);
if(r == std::nullopt) {
continue;
}
// l = minArg {span[1] >= zoomX[0]}
auto l = _wd[w[i]].lower_bound(x.first);
if(l == std::nullopt || *l > *r) {
continue;
}
// range ok
for(size_t s=*l; s<=*r; s++) {
if(s != *r) {
clusters[i].emplace_back(
i,
s,
s,
_wd[w[i]].tasks[s+1].end - _wd[w[i]].tasks[s].beg
);
}
else { // boundary
clusters[i].emplace_back(
i, s, s, observer_stamp_t::duration::max()
);
}
heap.push(std::prev(clusters[i].end()));
}
// while loop must sit after clustering is done
// because we have std::next(top)-> = top->f
while(heap.size() > limit) {
auto top = heap.top();
// if all clusters are in boundary - no need to cluster anymore
if(top->k == observer_stamp_t::duration::max()) {
break;
}
// remove the top element and cluster it with the next
heap.pop();
// merge top with top->next
std::next(top)->f = top->f;
clusters[top->i].erase(top);
}
}
// Output the segments
bool first_worker = true;
os << "[";
for(size_t i=0; i<w.size(); i++) {
if(!first_worker) {
os << ",";
}
else {
first_worker = false;
}
os << "{\"executor\":\"" << _wd[w[i]].eid << "\","
<< "\"worker\":\"" << _wd[w[i]].name << "\","
<< "\"tasks\":\"" << clusters[i].size() << "\","
<< "\"segs\": [";
size_t T=0, loads[TASK_TYPES.size()] = {0};
bool first_cluster = true;
for(const auto& cluster : clusters[i]) {
if(!first_cluster) {
os << ",";
}
else {
first_cluster = false;
}
// single task
os << "{";
if(cluster.f == cluster.t) {
const auto& task = _wd[w[i]].tasks[cluster.f];
os << "\"name\":\"" << task.name << "\","
<< "\"type\":\"" << to_string(task.type) << "\","
<< "\"span\": [" << std::chrono::duration_cast<D>(task.beg-_minX).count()
<< ","
<< std::chrono::duration_cast<D>(task.end-_minX).count()
<< "]";
}
else {
const auto& ftask = _wd[w[i]].tasks[cluster.f];
const auto& ttask = _wd[w[i]].tasks[cluster.t];
os << "\"name\":\"(" << (cluster.t-cluster.f+1) << " tasks)\","
<< "\"type\":\"clustered\","
<< "\"span\": [" << std::chrono::duration_cast<D>(ftask.beg-_minX).count()
<< ","
<< std::chrono::duration_cast<D>(ttask.end-_minX).count()
<< "]";
}
os << "}";
// calculate load
// TODO optimization with DP
for(size_t j=cluster.f; j<=cluster.t; j++) {
size_t t = std::chrono::duration_cast<D>(_wd[w[i]].tasks[j].span()).count();
T += t;
loads[static_cast<int>(_wd[w[i]].tasks[j].type)] += t;
}
}
os << "],"; // end segs
// load
os << "\"load\":[";
size_t x = 0;
for(size_t k=0; k<TASK_TYPES.size(); k++) {
auto type = static_cast<int>(TASK_TYPES[k]);
if(k) os << ",";
os << "{\"type\":\"" << to_string(TASK_TYPES[k]) << "\","
<< "\"span\":[" << x << "," << x+loads[type] << "],"
<< "\"ratio\":" << (T>0 ? loads[type]*100.0f/T : 0) << "}";
x+=loads[type];
}
os << "],";
// totalTime
os << "\"totalTime\":" << T;
os << "}";
}
os << "]";
}
observer_stamp_t minX() const {
return _minX;
}
observer_stamp_t maxX() const {
return _maxX;
}
size_t num_tasks() const {
return _num_tasks;
}
size_t num_executors() const {
return _num_executors;
}
size_t num_workers() const {
return _num_workers;
}
private:
std::vector<WorkerData> _wd;
// {std::numeric_limits<size_t>::max()};
// {std::numeric_limits<size_t>::lowest()};
observer_stamp_t _minX {observer_stamp_t::max()};
observer_stamp_t _maxX {observer_stamp_t::min()};
size_t _num_tasks {0};
size_t _num_executors {0};
size_t _num_workers {0};
std::unordered_map<std::string, size_t> _wdmap;
template <typename D>
std::pair<observer_stamp_t, observer_stamp_t>
decode_zoomx(std::optional<D> beg, std::optional<D> end) const {
observer_stamp_t b = beg ? *beg + _minX : _minX;
observer_stamp_t e = end ? *end + _minX : _maxX;
return {b, e};
}
std::vector<size_t> decode_zoomy(std::optional<std::vector<std::string>> zoomy) const {
if(zoomy) {
std::vector<size_t> w(zoomy->size());
for(size_t i=0; i<zoomy->size(); i++) {
auto itr = _wdmap.find((*zoomy)[i]);
if(itr == _wdmap.end()) {
TF_THROW("failed to find worker ", (*zoomy)[i]);
}
w[i] = itr->second;
}
return w;
}
else {
std::vector<size_t> w(_wd.size());
for(size_t i=0; i<_wd.size(); i++) {
w[i] = i;
}
return w;
}
}
};
} // namespace tf ------------------------------------------------------------
int main(int argc, char* argv[]) {
// parse arguments
CLI::App app{"tfprof"};
int port{8080};
app.add_option("-p,--port", port, "port to listen (default=8080)");
std::string input;
app.add_option("-i,--input", input, "input profiling file")
->required();
std::string mount;
app.add_option("-m,--mount", mount, "mount path to index.html")
->required();
CLI11_PARSE(app, argc, argv);
// change log pattern
spdlog::set_pattern("[%^%L %D %H:%M:%S.%e%$] %v");
spdlog::set_level(spdlog::level::debug); // Set global log level to debug
spdlog::info("reading database {} ...", input);
// create a database
tf::Database db(input);
spdlog::info(
"read {} (#tasks={:d}, #executors={:d}, #workers={:d})",
input, db.num_tasks(), db.num_executors(), db.num_workers()
);
// create a http server
httplib::Server server;
if(server.set_mount_point("/", mount.c_str())) {
spdlog::info("mounted '/' to {}", mount);
}
else {
spdlog::critical("failed to mount '/' to {}", mount);
}
// Put method: queryInfo
server.Put("/queryInfo",
[&db, &input](const httplib::Request& req, httplib::Response& res){
spdlog::info(
"/queryInfo: connected a new client {0}:{1:d}",
req.remote_addr, req.remote_port
);
std::ostringstream oss;
oss << "{\"tfpFile\":\"" << input << "\""
<< ",\"numTasks\":" << db.num_tasks()
<< ",\"numExecutors\":" << db.num_executors()
<< ",\"numWorkers\":" << db.num_workers() << '}';
res.set_content(oss.str().c_str(), "application/json");
spdlog::info("/queryInfo: sent {0:d} bytes", oss.str().size());
}
);
// Put method: queryData
server.Put("/queryData",
[&db](const httplib::Request& req, httplib::Response& res){
auto body = nlohmann::json::parse(req.body);
const auto& jx = body["zoomX"];
const auto& jy = body["zoomY"];
const auto& jv = body["view"];
size_t jl = body["limit"];
spdlog::info(
"/queryData: zoomX={}, zoomY=[...{} workers], view={}, limit={}",
jx.dump(), jy.size(), jv.dump(), jl
);
std::optional<std::chrono::microseconds> xbeg, xend;
std::optional<std::vector<std::string>> y;
tf::Database::ViewType view_type = tf::Database::CLUSTER;
if(jx.is_array() && jx.size() == 2) {
xbeg = std::chrono::microseconds(std::llround((double)jx[0]));
xend = std::chrono::microseconds(std::llround((double)jx[1]));
}
if(jy.is_array()) {
y.emplace();
for(auto& w : jy) {
y->push_back(std::move(w));
}
}
if(jv == "Criticality") {
view_type = tf::Database::CRITICALITY;
}
std::ostringstream oss;
switch(view_type) {
case tf::Database::CRITICALITY:
db.query_criticality<std::chrono::microseconds>(oss, xbeg, xend, y, jl);
break;
case tf::Database::CLUSTER:
db.query_cluster<std::chrono::microseconds>(oss, xbeg, xend, y, jl);
break;
}
res.set_content(oss.str().c_str(), "application/json");
spdlog::info("/queryData: sent {0:d} bytes", oss.str().size());
}
);
spdlog::info("listening to http://localhost:{:d} ...", port);
server.listen("0.0.0.0", port);
spdlog::info("shut down server");
return 0;
}