/* *************************************************************************** * * FastFlow is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License version 3 as * published by the Free Software Foundation. * Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3 * or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT) * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * **************************************************************************** */ /* Author: * Nicolo' Tonci */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace n_fs = std::filesystem; #ifndef HOST_NAME_MAX #define HOST_NAME_MAX 255 #endif enum Proto {TCP = 1 , MPI}; Proto usedProtocol; bool seeAll = false; std::vector viewGroups; char hostname[HOST_NAME_MAX]; std::string configFile(""); std::string executable; static inline unsigned long getusec() { struct timeval tv; gettimeofday(&tv,NULL); return (unsigned long)(tv.tv_sec*1e6+tv.tv_usec); } bool toBePrinted(std::string gName){ return (seeAll || (find(viewGroups.begin(), viewGroups.end(), gName) != viewGroups.end())); } std::vector split (const std::string &s, char delim) { std::vector result; std::stringstream ss(s); std::string item; while (getline (ss, item, delim)) result.push_back (item); return result; } struct G { std::string name, host, preCmd; int fd = 0; FILE* file = nullptr; template void load( Archive & ar ){ ar(cereal::make_nvp("name", name)); try { std::string endpoint; ar(cereal::make_nvp("endpoint", endpoint)); std::vector endp(split(endpoint, ':')); host = endp[0]; //port = std::stoi(endp[1]); } catch (cereal::Exception&) { host = "127.0.0.1"; // set the host to localhost if not found in config file! ar.setNextName(nullptr); } try { ar(cereal::make_nvp("preCmd", preCmd)); } catch (cereal::Exception&) { ar.setNextName(nullptr); } } void run(){ char b[1024]; // ssh -t // trovare MAX ARGV sprintf(b, " %s %s %s %s %s --DFF_Config=%s --DFF_GName=%s %s 2>&1 %s", (isRemote() ? "ssh -T " : ""), (isRemote() ? host.c_str() : ""), (isRemote() ? "'" : ""), this->preCmd.c_str(), executable.c_str(), configFile.c_str(), this->name.c_str(), toBePrinted(this->name) ? "" : "> /dev/null", (isRemote() ? "'" : "")); std::cout << "Executing the following command: " << b << std::endl; file = popen(b, "r"); fd = fileno(file); if (fd == -1) { printf("Failed to run command\n" ); exit(1); } int flags = fcntl(fd, F_GETFL, 0); flags |= O_NONBLOCK; fcntl(fd, F_SETFL, flags); } bool isRemote(){return !(!host.compare("127.0.0.1") || !host.compare("localhost") || !host.compare(hostname));} }; bool allTerminated(std::vector& groups){ for (G& g: groups) if (g.file != nullptr) return false; return true; } static inline void usage(char* progname) { std::cout << "\nUSAGE: " << progname << " [Options] -f \n" << "Options: \n" << "\t -v ,..., \t Prints the output of the specified groups\n" << "\t -V \t Print the output of all groups\n" << "\t -p \"TCP|MPI\" \t Force communication protocol\n"; std::cout << "\n"; } std::string generateRankFile(std::vector& parsedGroups){ std::string name = "/tmp/dffRankfile" + std::to_string(getpid()); std::ofstream tmpFile(name, std::ofstream::out); for(size_t i = 0; i < parsedGroups.size(); i++) tmpFile << "rank " << i << "=" << parsedGroups[i].host << " slot=0\n"; /*for (const G& group : parsedGroups) tmpFile << group.host << std::endl;*/ tmpFile.close(); // return the name of the temporary file just created; remember to remove it after the usage return name; } int main(int argc, char** argv) { if (argc == 1 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-help") == 0 || strcmp(argv[1], "-h") == 0){ usage(argv[0]); exit(EXIT_SUCCESS); } // get the hostname if (gethostname(hostname, HOST_NAME_MAX) != 0) { perror("gethostname"); exit(EXIT_FAILURE); } int optind=0; for(int i=1;i parsedGroups; try { cereal::JSONInputArchive ar(is); // get the protocol to be used from the configuration file if it was not forced by the command line if (!usedProtocol) try { std::string tmpProtocol; ar(cereal::make_nvp("protocol", tmpProtocol)); if (tmpProtocol == "MPI") usedProtocol = Proto::MPI; else usedProtocol = Proto::TCP; } catch (cereal::Exception&) { ar.setNextName(nullptr); // if the protocol is not specified we assume TCP usedProtocol = Proto::TCP; } // parse all the groups in the configuration file ar(cereal::make_nvp("groups", parsedGroups)); } catch (const cereal::Exception& e){ std::cerr << "Error parsing the JSON config file. Check syntax and structure of the file and retry!" << std::endl; exit(EXIT_FAILURE); } #ifdef DEBUG for(auto& g : parsedGroups) std::cout << "Group: " << g.name << " on host " << g.host << std::endl; #endif if (usedProtocol == Proto::TCP){ auto Tstart = getusec(); for (G& g : parsedGroups) g.run(); while(!allTerminated(parsedGroups)){ for(G& g : parsedGroups){ if (g.file != nullptr){ char buff[1024] = { 0 }; ssize_t result = read(g.fd, buff, sizeof(buff)); if (result == -1){ if (errno == EAGAIN) continue; int code = pclose(g.file); if (WEXITSTATUS(code) != 0) std::cout << "[" << g.name << "][ERR] Report an return code: " << WEXITSTATUS(code) << std::endl; g.file = nullptr; } else if (result > 0){ std::cout << buff; } else { int code = pclose(g.file); if (WEXITSTATUS(code) != 0) std::cout << "[" << g.name << "][ERR] Report an return code: " << WEXITSTATUS(code) << std::endl; g.file = nullptr; } } } std::this_thread::sleep_for(std::chrono::milliseconds(15)); } std::cout << "Elapsed time: " << (getusec()-(Tstart))/1000 << " ms" << std::endl; } if (usedProtocol == Proto::MPI){ std::string rankFile = generateRankFile(parsedGroups); std::cout << "RankFile: " << rankFile << std::endl; // invoke mpirun using the just created rankfile char command[350]; sprintf(command, "mpirun -np %lu --rankfile %s %s --DFF_Config=%s", parsedGroups.size(), rankFile.c_str(), executable.c_str(), configFile.c_str()); std::cout << "mpicommand: " << command << "\n"; FILE *fp; char buff[1024]; fp = popen(command, "r"); if (fp == NULL) { printf("Failed to run command\n" ); exit(1); } /* Read the output a line at a time - output it. */ while (fgets(buff, sizeof(buff), fp) != NULL) { std::cout << buff; } pclose(fp); std::remove(rankFile.c_str()); } return 0; }