15 #include <slurm/slurm.h> 23 static uint64_t epoch_start = 0;
25 template <
class T,
class U>
36 return (
stat(name.c_str(), &buffer) == 0);
40 mkdir(name.c_str(), S_IRWXU | S_IRWXG | S_IRWXO);
45 return ((
double)num_bytes) * 8.0 / (eTime - sTime);
58 char *nodeid_ptr = getenv(
"SLURM_NODEID");
59 char *nnodes_ptr = getenv(
"SLURM_NNODES");
60 char *hostnames = getenv(
"SLURM_JOB_NODELIST");
61 if(!nodeid_ptr || !nnodes_ptr || !hostnames)
return false;
63 hostlist_t hostlist = slurm_hostlist_create(hostnames);
64 if(!hostlist)
return false;
68 while((host = slurm_hostlist_shift(hostlist))) {
69 addresses.emplace(i++, host);
72 slurm_hostlist_destroy(hostlist);
78 assert(node_rank < num_nodes);
90 cout <<
"Please enter '[node_id] [num_nodes]': ";
96 cout <<
"Please enter IP Address for node " << i <<
": ";
98 addresses.emplace(i, addr);
103 LOG_EVENT(-1, -1, -1,
"begin_epoch_reset");
105 LOG_EVENT(-1, -1, -1,
"end_epoch_reset");
109 double sum = std::accumulate(v.begin(), v.end(), 0.0);
110 return sum / v.size();
114 double sq_sum = std::inner_product(v.begin(), v.end(), v.begin(), 0.0);
115 return std::sqrt(sq_sum / v.size() - mean *
mean);
121 auto flush_server = []() {
124 this_thread::sleep_for(chrono::seconds(10));
127 thread t(flush_server);
133 auto basename = [](
const char *path) {
134 const char *base = strrchr(path,
'/');
135 return base ? base + 1 : path;
138 static bool print_header =
true;
141 "time, file:line, event_name, group_number, message_number, " 143 print_header =
false;
145 for(
const auto &e : events) {
146 if(e.group_number == (uint32_t)(-1)) {
147 printf(
"%5.6f, %s:%d, %s\n", 1.0e-6 * (e.time - epoch_start),
148 basename(e.file), e.line, e.event_name);
150 }
else if(e.message_number == (
size_t)(-1)) {
151 printf(
"%5.6f, %s:%d, %s, %" PRIu32
"\n",
152 1.0e-6 * (e.time - epoch_start), basename(e.file), e.line,
153 e.event_name, e.group_number);
155 }
else if(e.block_number == (
size_t)(-1)) {
156 printf(
"%5.6f, %s:%d, %s, %" PRIu32
", %zu\n",
157 1.0e-6 * (e.time - epoch_start), basename(e.file), e.line,
158 e.event_name, e.group_number, e.message_number);
161 printf(
"%5.6f, %s:%d, %s, %" PRIu32
", %zu, %zu\n",
162 1.0e-6 * (e.time - epoch_start), basename(e.file), e.line,
163 e.event_name, e.group_number, e.message_number,
bool file_exists(const string &name)
double compute_data_rate(size_t num_bytes, uint64_t sTime, uint64_t eTime)
#define LOG_EVENT(group_number, message_number, block_number, event_name)
void query_peer_addresses(map< uint32_t, string > &addresses, uint32_t &node_rank)
void put_flush(const char *str)
bool slurm_query_addresses(map< uint32_t, string > &addresses, uint32_t &node_rank)
void start_flush_server()
double compute_mean(std::vector< double > v)
void create_directory(const string &name)
double compute_stddev(std::vector< double > v)
uint64_t addr
Buffer address.