ctrl: reenable the waitpid() code to catch crashing/killed dooms

TODO: implement DoomState::tLastActive timeout logic
This commit is contained in:
oxmox 2023-02-21 23:05:53 +01:00
parent eea3377406
commit 8b5231a782

View file

@ -166,45 +166,52 @@ inline auto dp_elapsed(const std::chrono::steady_clock::time_point &tStart) { re
void check_on_dooms(ControllerContext &ctx) void check_on_dooms(ControllerContext &ctx)
{ {
#if 0
pid_t pid = 0; pid_t pid = 0;
// This works for dooms forked from the controller, not for externally // Detect dooms that have terminated. This works for dooms forked from the
// started dooms. // controller, not for externally started dooms.
do do
{ {
int wstatus = 0; int wstatus = 0;
if (pid = waitpid(0, &wstatus, WNOHANG); pid > 0) if (pid = waitpid(0, &wstatus, WNOHANG); pid > 0)
{ {
auto ds = find_in_container(ctx.dooms, [pid] (const auto &ds) { return ds.id == pid; }); if (auto ds = find_in_container(ctx.dooms, [pid] (const auto &ds) { return ds.id == pid; });
ds != std::end(ctx.dooms))
assert(ds != std::end(ctx.dooms));
if (ds != std::end(ctx.dooms))
{ {
if (WIFEXITED(wstatus)) if (WIFEXITED(wstatus))
log_info("doom(%d) exited with status %d", pid, WEXITSTATUS(wstatus)); log_info("doom(%d) exited with status %d", pid, WEXITSTATUS(wstatus));
else if (WIFSIGNALED(wstatus)) else if (WIFSIGNALED(wstatus))
log_warn("doom#(%d) got killed by signal %d", pid, WTERMSIG(wstatus)); log_warn("doom#(%d) got killed by signal %d", pid, WTERMSIG(wstatus));
SDL_DestroyTexture(ds->texture); // TODO: use the destructor to do this // Manually set Endoom state and let the code below clean it up.
ctx.dooms.erase(ds); ds->state = DP_DS_Endoom;
} }
} }
} while (pid > 0); } while (pid > 0);
#else
const auto sz0 = ctx.dooms.size(); // Find dooms that are in Endoom state and remove them. This works for
auto eb = std::remove_if(std::begin(ctx.dooms), std::end(ctx.dooms), [] (const auto &ds) { return ds.state == DP_DS_Endoom; }); // externally started dooms if we received their Endoom DoomState update.
if (eb != std::end(ctx.dooms))
{ {
auto count = std::distance(eb, std::end(ctx.dooms)); const auto prevCount = ctx.dooms.size();
std::for_each(eb, std::end(ctx.dooms), [] (auto &ds) { SDL_DestroyTexture(ds.texture); ds.texture = nullptr; }); const auto removedBegin = std::remove_if(std::begin(ctx.dooms), std::end(ctx.dooms), [](const auto &ds)
ctx.dooms.erase(eb, std::end(ctx.dooms)); { return ds.state == DP_DS_Endoom; });
const auto sz1 = ctx.dooms.size(); if (removedBegin != std::end(ctx.dooms))
log_info("Erased %zu dooms which were in Endoom state. doomcount before=%zu, after=%zu", count, sz0, sz1); {
auto count = std::distance(removedBegin, std::end(ctx.dooms));
std::for_each(removedBegin, std::end(ctx.dooms), [](auto &ds)
{ SDL_DestroyTexture(ds.texture); ds.texture = nullptr; });
ctx.dooms.erase(removedBegin, std::end(ctx.dooms));
const auto newCount = ctx.dooms.size();
log_info("Erased %zu dooms which were in Endoom state. Doomcount before=%zu, after=%zu", count, prevCount, newCount);
} }
#endif }
// FIXME: We can miss Endoom state updates when nng has to drop message due
// to queue size limits. If this happens for an externally started doom it
// will never be removed from ctx.dooms (waitpid() does not work because the
// doom is not our child). Use DoomState::tLastActive and a fixed timeout
// value to timeout dooms and erase them from ctx.dooms.
} }
void do_networking(ControllerContext &ctx) void do_networking(ControllerContext &ctx)