Derecho  0.9
Distributed systems toolkit for RDMA
schedule.cpp
Go to the documentation of this file.
2 
3 #include <cassert>
4 #include <climits>
5 
6 using std::min;
7 using std::optional;
8 
9 #ifndef NDEBUG
10 #define assert_always(x...) assert(x)
11 #else
12 #define assert_always(x...) if(!x){abort();}
13 #endif
14 
15 vector<uint32_t> chain_schedule::get_connections() const {
16  // establish connection with member_index-1 and member_index+1, if they
17  // exist
18  if(member_index == 0) {
19  if(num_members == 1) {
20  return {};
21  }
22  // with only node with rank 1
23  return {1};
24  } else if(member_index == num_members - 1) {
25  return {num_members - 2};
26  } else {
27  return {member_index - 1, member_index + 1};
28  }
29 }
30 size_t chain_schedule::get_total_steps(size_t num_blocks) const {
31  size_t total_steps = num_blocks + num_members - 2;
32  return total_steps;
33 }
34 optional<schedule::block_transfer> chain_schedule::get_outgoing_transfer(size_t num_blocks, size_t step) const {
35  size_t block_number = step - member_index;
36 
37  if(member_index > step || block_number >= num_blocks || member_index == num_members - 1) {
38  return std::nullopt;
39  }
40 
41  return block_transfer{(uint32_t)(member_index + 1), block_number};
42 }
43 optional<schedule::block_transfer> chain_schedule::get_incoming_transfer(size_t num_blocks, size_t step) const {
44  size_t block_number = (step + 1) - member_index;
45  if(member_index > step + 1 || block_number >= num_blocks || member_index == 0) {
46  return std::nullopt;
47  }
48  return block_transfer{(uint32_t)(member_index - 1), block_number};
49 }
50 optional<schedule::block_transfer> chain_schedule::get_first_block(size_t num_blocks) const {
51  if(member_index == 0) return std::nullopt;
52  return block_transfer{(uint32_t)(member_index - 1), 0};
53 }
54 
55 vector<uint32_t> sequential_schedule::get_connections() const {
56  // if sender, connect to every receiver
57  if(member_index == 0) {
58  vector<uint32_t> ret;
59  for(auto i = 1u; i < num_members; ++i) {
60  ret.push_back(i);
61  }
62  return ret;
63  }
64  // if receiver, connect only to sender
65  else {
66  return {0};
67  }
68 }
69 size_t sequential_schedule::get_total_steps(size_t num_blocks) const {
70  return num_blocks * (num_members - 1);
71 }
72 optional<schedule::block_transfer> sequential_schedule::get_outgoing_transfer(size_t num_blocks, size_t step) const {
73  if(member_index > 0 || step >= num_blocks * (num_members - 1)) {
74  return std::nullopt;
75  }
76 
77  size_t block_number = step % num_blocks;
78  return block_transfer{(uint32_t)(1 + step / num_blocks), block_number};
79 }
80 optional<schedule::block_transfer> sequential_schedule::get_incoming_transfer(size_t num_blocks, size_t step) const {
81  if(1 + step / num_blocks != member_index) {
82  return std::nullopt;
83  }
84 
85  size_t block_number = step % num_blocks;
86  return block_transfer{(uint32_t)0, block_number};
87 }
88 optional<schedule::block_transfer> sequential_schedule::get_first_block(size_t num_blocks) const {
89  if(member_index == 0) return std::nullopt;
90  return block_transfer{0, 0};
91 }
92 
93 vector<uint32_t> tree_schedule::get_connections() const {
94  vector<uint32_t> ret;
95  for(uint32_t i = 0; i < 32; i++) {
96  if(member_index != 0 && (2ull << i) > member_index) {
97  ret.push_back(member_index - (1 << i));
98  break;
99  }
100  }
101 
102  for(uint32_t i = 0; i < 32 && num_members - member_index > (1u << i); i++) {
103  if((1u << i) > member_index) {
104  ret.push_back(member_index + (1 << i));
105  }
106  }
107  return ret;
108 }
109 size_t tree_schedule::get_total_steps(size_t num_blocks) const {
110  unsigned int log2_num_members = ceil(log2(num_members));
111  return num_blocks * log2_num_members;
112 }
113 optional<schedule::block_transfer> tree_schedule::get_outgoing_transfer(size_t num_blocks, size_t step) const {
114  size_t stage = step / num_blocks;
115  if(step >= get_total_steps(num_blocks) || (1u << stage) <= member_index || (1u << stage) >= num_members - member_index) {
116  return std::nullopt;
117  } else {
118  return block_transfer{member_index + (1u << stage),
119  step - stage * num_blocks};
120  }
121 }
122 optional<schedule::block_transfer> tree_schedule::get_incoming_transfer(size_t num_blocks, size_t step) const {
123  size_t stage = step / num_blocks;
124  if(step < get_total_steps(num_blocks) && (1u << stage) <= member_index && member_index < (2u << stage)) {
125  return block_transfer{member_index - (1u << stage),
126  step - stage * num_blocks};
127  } else {
128  return std::nullopt;
129  }
130 }
131 optional<schedule::block_transfer> tree_schedule::get_first_block(size_t num_blocks) const {
132  if(member_index == 0) return std::nullopt;
133 
134  for(uint32_t i = 0; i < 32; i++) {
135  if((2ull << i) > member_index)
136  return block_transfer{member_index - (1 << i), 0};
137  }
138  assert_always(false);
139 }
140 vector<uint32_t> binomial_schedule::get_connections() const {
141  vector<uint32_t> ret;
142 
143  uint32_t twin = (member_index < (1u << log2_num_members))
144  ? member_index + (1 << log2_num_members) - 1
145  : member_index + 1 - (1 << log2_num_members);
146 
147  uint32_t vertex = min(member_index, twin);
148 
149  if(member_index > 0 && twin < num_members) ret.push_back(twin);
150 
151  for(size_t i = 0; i < log2_num_members; ++i) {
152  // connect to num_members^ (1 << i)
153  uint32_t neighbor = vertex ^ (1 << i);
154  uint32_t neighbor_twin = neighbor + (1 << log2_num_members) - 1;
155 
156  ret.push_back(neighbor);
157  if(neighbor > 0 && neighbor_twin < num_members) {
158  ret.push_back(neighbor_twin);
159  }
160  }
161  return ret;
162 }
163 
164 size_t binomial_schedule::get_total_steps(size_t num_blocks) const {
165  if(1u << log2_num_members == num_members)
166  return num_blocks + log2_num_members - 1;
167 
168  return num_blocks + log2_num_members;
169 }
170 optional<schedule::block_transfer> binomial_schedule::get_vertex_outgoing_transfer(
171  uint32_t sender, size_t send_step, uint32_t num_members,
172  unsigned int log2_num_members, size_t num_blocks, size_t total_steps) {
173  /*
174  * During a typical step, the rotated rank will indicate:
175  *
176  * 0000...0001 -> block = send_step
177  * 1000...0001 -> block = send_step - 1
178  * x100...0001 -> block = send_step - 2
179  * xx10...0001 -> block = send_step - 3
180  *
181  * xxxx...xx11 -> block = send_step - log2_num_members + 1
182  * xxxx...xxx0 -> block = send_step - log2_num_members
183  */
184 
185  size_t rank_mask = (~((size_t)0)) >> (sizeof(size_t) * CHAR_BIT - log2_num_members);
186 
187  size_t step_index = send_step % log2_num_members;
188  uint32_t neighbor = sender ^ (1 << step_index);
189  if(send_step >= total_steps || neighbor == 0 || (send_step == total_steps - 1 && num_members > 1u << log2_num_members)) {
190  // printf("send_step = %d, neighbor = %d, log2(...) = %f\n",
191  // (int)send_step, (int)neighbor, log2(member_index|neighbor));
192  // fflush(stdout);
193  return std::nullopt;
194  }
195 
196  size_t rotated_rank = ((neighbor | (neighbor << log2_num_members)) >> step_index) & rank_mask;
197  // printf("send_step = %d, rotated_rank = %x\n", (int)send_step,
198  // (int)rotated_rank);
199  // fflush(stdout);
200 
201  if((rotated_rank & 1) == 0) {
202  if(send_step < log2_num_members) {
203  // printf("send_step < log2_num_members\n");
204  // fflush(stdout);
205  return std::nullopt;
206  }
207  return block_transfer{neighbor, send_step - log2_num_members};
208  }
209 
210  for(unsigned int index = 1; index < log2_num_members; index++) {
211  if(rotated_rank & (1 << index)) {
212  if(send_step + index < log2_num_members) {
213  return std::nullopt;
214  }
215  size_t block_number = min(send_step + index - log2_num_members, num_blocks - 1);
216  return block_transfer{neighbor, block_number};
217  }
218  }
219 
220  size_t block_number = min(send_step, num_blocks - 1);
221  return block_transfer{neighbor, block_number};
222 }
223 optional<schedule::block_transfer> binomial_schedule::get_vertex_incoming_transfer(
224  uint32_t vertex, size_t send_step, uint32_t num_members,
225  unsigned int log2_num_members, size_t num_blocks, size_t total_steps) {
226  size_t step_index = send_step % log2_num_members;
227  uint32_t neighbor = vertex ^ (1 << step_index);
228 
229  auto transfer = get_vertex_outgoing_transfer(neighbor, send_step, num_members,
230  log2_num_members, num_blocks, total_steps);
231  if(!transfer) return std::nullopt;
232  return block_transfer{neighbor, transfer->block_number};
233 }
234 optional<schedule::block_transfer> binomial_schedule::get_outgoing_transfer(
235  uint32_t node, size_t step, uint32_t num_members,
236  unsigned int log2_num_members, size_t num_blocks, size_t total_steps) {
237  uint32_t vertex = (node < (1u << log2_num_members))
238  ? node
239  : node + 1 - (1 << log2_num_members);
240  uint32_t intervertex_receiver = get_intervertex_receiver(
241  vertex, step, num_members, log2_num_members, num_blocks, total_steps);
242 
243  if(step >= total_steps) {
244  return std::nullopt;
245  } else if(step == total_steps - 1 && num_blocks == 1 && num_members > (1u << log2_num_members)) {
246  uint32_t intervertex_receiver = get_intervertex_receiver(vertex, step, num_members,
247  log2_num_members, num_blocks, total_steps);
248 
249  uint32_t target = get_intervertex_receiver(vertex ^ 1, step, num_members,
250  log2_num_members, num_blocks, total_steps);
251 
252  bool node_has_twin = node != 0 && vertex + (1 << log2_num_members) - 1 < num_members;
253  bool target_has_twin = target != 0 && (target >= (1u << log2_num_members) || target + (1u << log2_num_members) - 1 < num_members);
254 
255  if((node_has_twin && node == intervertex_receiver) || node == 1 || !target_has_twin)
256  return std::nullopt;
257  else
258  return block_transfer{target, 0};
259  } else if(node == intervertex_receiver && vertex != 0 && vertex + (1u << log2_num_members) - 1 < num_members) {
260  auto block = get_intravertex_block(vertex, step, num_members, log2_num_members,
261  num_blocks, total_steps);
262 
263  if(!block) return std::nullopt;
264  uint32_t twin = (node < (1u << log2_num_members))
265  ? node + (1 << log2_num_members) - 1
266  : node + 1 - (1 << log2_num_members);
267  return block_transfer{twin, *block};
268  } else {
269  if(step == total_steps - 1 && num_members > 1u << log2_num_members) {
270  if((vertex + (1u << log2_num_members) - 1) >= num_members || vertex == 0)
271  return std::nullopt;
272 
273  uint32_t twin = (node < (1u << log2_num_members))
274  ? node + (1 << log2_num_members) - 1
275  : node + 1 - (1 << log2_num_members);
276 
277  size_t s = step;
278  uint32_t r = twin;
279  while(r == twin) {
280  assert(s > 0);
281  r = get_intervertex_receiver(vertex, --s, num_members,
282  log2_num_members, num_blocks,
283  total_steps);
284  }
285  auto transfer = get_vertex_incoming_transfer(
286  vertex, s, num_members, log2_num_members, num_blocks,
287  total_steps);
288 
289  assert(transfer);
290  transfer->target = twin;
291  return transfer;
292  }
293 
294  auto transfer = get_vertex_outgoing_transfer(vertex, step, num_members,
295  log2_num_members,
296  num_blocks, total_steps);
297 
298  if(transfer) {
299  transfer->target = get_intervertex_receiver(
300  transfer->target, step, num_members, log2_num_members,
301  num_blocks, total_steps);
302  }
303  return transfer;
304  }
305 }
306 optional<schedule::block_transfer> binomial_schedule::get_incoming_transfer(
307  uint32_t node, size_t step, uint32_t num_members,
308  unsigned int log2_num_members, size_t num_blocks, size_t total_steps) {
309  uint32_t vertex = (node < (1u << log2_num_members))
310  ? node
311  : node + 1 - (1 << log2_num_members);
312  uint32_t intervertex_receiver = get_intervertex_receiver(
313  vertex, step, num_members, log2_num_members, num_blocks, total_steps);
314 
315  if(step >= total_steps) {
316  return std::nullopt;
317  } else if(step == total_steps - 1 && num_blocks == 1 && num_members > (1u << log2_num_members)) {
318  uint32_t target = get_intervertex_receiver(vertex ^ 1, step, num_members,
319  log2_num_members, num_blocks, total_steps);
320 
321  bool node_has_twin = node != 0 && vertex + (1 << log2_num_members) - 1 < num_members;
322 
323  if(target >= (1u << log2_num_members))
324  target = target + 1 - (1 << log2_num_members);
325  else if(target != 0 && target + (1 << log2_num_members) - 1 < num_members)
326  target = target + (1 << log2_num_members) - 1;
327 
328  if(!node_has_twin || node != intervertex_receiver)
329  return std::nullopt;
330  else {
331  return block_transfer{target, 0};
332  }
333  } else if(node != intervertex_receiver) {
334  auto block = get_intravertex_block(vertex, step, num_members, log2_num_members,
335  num_blocks, total_steps);
336 
337  if(!block) return std::nullopt;
338  uint32_t twin = (node < (1u << log2_num_members))
339  ? node + (1 << log2_num_members) - 1
340  : node + 1 - (1 << log2_num_members);
341  return block_transfer{twin, *block};
342  } else {
343  if(step == total_steps - 1 && num_members > 1u << log2_num_members) {
344  if((vertex + (1u << log2_num_members) - 1) >= num_members || vertex == 0)
345  return std::nullopt;
346 
347  uint32_t twin = (node < (1u << log2_num_members))
348  ? node + (1 << log2_num_members) - 1
349  : node + 1 - (1 << log2_num_members);
350 
351  auto transfer = get_outgoing_transfer(twin, step, num_members, log2_num_members,
352  num_blocks, total_steps);
353 
354  assert(transfer);
355 
356  transfer->target = twin;
357  return transfer;
358  }
359 
360  auto transfer = get_vertex_incoming_transfer(vertex, step, num_members,
361  log2_num_members,
362  num_blocks, total_steps);
363  if(transfer) {
364  uint32_t neighbor = get_intervertex_receiver(
365  transfer->target, step, num_members, log2_num_members,
366  num_blocks, total_steps);
367 
368  if(neighbor == 0)
369  transfer->target = neighbor;
370  else if(neighbor >= (1u << log2_num_members))
371  transfer->target = neighbor + 1 - (1u << log2_num_members);
372  else if(neighbor + (1u << log2_num_members) - 1 < num_members)
373  transfer->target = neighbor + (1u << log2_num_members) - 1;
374  else
375  transfer->target = neighbor;
376  }
377  return transfer;
378  }
379 }
380 uint32_t binomial_schedule::get_intervertex_receiver(uint32_t vertex, size_t step,
381  uint32_t num_members,
382  unsigned int log2_num_members,
383  size_t num_blocks,
384  size_t total_steps) {
385  // If the vertex only has one node, then no intravertex transfer can take
386  // place.
387  if((vertex + (1u << log2_num_members) - 1) >= num_members || vertex == 0)
388  return vertex;
389 
390  size_t weight = 0;
391  for(int i = 0; i < 32; i++) {
392  if(vertex & (1 << i)) weight++;
393  }
394 
395  // Compute the number of times the intravertex sender has flipped since the
396  // start of the message. We do not count the current step, because that
397  // will not affect the transfer going on now.
398  auto total_flips = [&](size_t step) {
399  size_t flips = (step / log2_num_members) * weight;
400  for(unsigned int i = 0; i < step % log2_num_members; i++) {
401  if(vertex & (1 << i)) flips++;
402  }
403  return flips;
404  };
405 
406  if(total_flips(step) % 2 == 1) {
407  return vertex + (1u << log2_num_members) - 1;
408  }
409  return vertex;
410 }
412  uint32_t vertex, size_t step, uint32_t num_members,
413  unsigned int log2_num_members, size_t num_blocks, size_t total_steps) {
414  // If the vertex only has one node, then no intravertex transfer can take
415  // place.
416  if((vertex + (1u << log2_num_members) - 1) >= num_members || vertex == 0)
417  return std::nullopt;
418 
419  size_t weight = 0;
420  for(int i = 0; i < 32; i++) {
421  if(vertex & (1 << i)) weight++;
422  }
423 
424  // Compute the number of times the intravertex sender has flipped since the
425  // start of the message. We do not count the current step, because that
426  // will not affect the transfer going on now.
427  auto total_flips = [&](size_t step) {
428  size_t flips = ((step) / log2_num_members) * weight;
429  for(unsigned int i = 0; i < (step) % log2_num_members; i++) {
430  if(vertex & (1 << i)) flips++;
431  }
432  return flips;
433  };
434 
435  size_t flips = total_flips(step);
436 
437  // The first block received triggers a flip. If we haven't gotten it yet,
438  // then clearly there can't be an intravertex transfer.
439  if(flips == 0) {
440  return std::nullopt;
441  }
442 
443  // uint32_t target = vertex;
444  // if(flips % 2 == 0) {
445  // target += (1 << log2_num_members) - 1;
446  // }
447 
448  size_t prev_receive_block_step = step - 1;
449  if(flips != total_flips(step - 1)) {
450  if(flips <= 1) return std::nullopt;
451 
452  while(total_flips(prev_receive_block_step) != flips - 2) {
453  --prev_receive_block_step;
454  }
455  }
456 
457  auto last = get_vertex_incoming_transfer(vertex, prev_receive_block_step,
458  num_members, log2_num_members,
459  num_blocks, total_steps);
460 
461  if(!last) return std::nullopt;
462  return last->block_number;
463 }
464 
465 optional<schedule::block_transfer> binomial_schedule::get_outgoing_transfer(size_t num_blocks, size_t step) const {
467  log2_num_members, num_blocks,
468  get_total_steps(num_blocks));
469 }
470 optional<schedule::block_transfer> binomial_schedule::get_incoming_transfer(size_t num_blocks, size_t step) const {
472  log2_num_members, num_blocks,
473  get_total_steps(num_blocks));
474 }
475 
476 optional<schedule::block_transfer> binomial_schedule::get_first_block(size_t num_blocks) const {
477  if(member_index == 0) return std::nullopt;
478 
479  size_t simulated_total_steps = num_members == 1u << log2_num_members
480  ? 1024 + log2_num_members - 1
481  : 1024 + log2_num_members;
482 
483  size_t step = 0;
484  optional<block_transfer> transfer;
485  while(!transfer) {
486  transfer = get_incoming_transfer(member_index, step++, num_members,
487  log2_num_members, 1024,
488  simulated_total_steps);
489  assert(step < simulated_total_steps);
490  }
491 
492  return transfer;
493 }
optional< block_transfer > get_incoming_transfer(size_t num_blocks, size_t receive_step) const
Definition: schedule.cpp:122
vector< uint32_t > get_connections() const
Definition: schedule.cpp:140
vector< uint32_t > get_connections() const
Definition: schedule.cpp:15
optional< block_transfer > get_incoming_transfer(size_t num_blocks, size_t receive_step) const
Definition: schedule.cpp:43
static optional< block_transfer > get_incoming_transfer(uint32_t node, size_t step, uint32_t num_members, unsigned int log2_num_members, size_t num_blocks, size_t total_steps)
Definition: schedule.cpp:306
size_t get_total_steps(size_t num_blocks) const
Definition: schedule.cpp:109
vector< uint32_t > get_connections() const
Definition: schedule.cpp:93
size_t get_total_steps(size_t num_blocks) const
Definition: schedule.cpp:30
const uint32_t num_members
Definition: schedule.hpp:13
static optional< size_t > get_intravertex_block(uint32_t vertex, size_t step, uint32_t num_members, unsigned int log2_num_members, size_t num_blocks, size_t total_steps)
Definition: schedule.cpp:411
optional< block_transfer > get_outgoing_transfer(size_t num_blocks, size_t send_step) const
Definition: schedule.cpp:113
const uint32_t member_index
Definition: schedule.hpp:14
optional< block_transfer > get_vertex_outgoing_transfer(size_t send_step)
static uint32_t get_intervertex_receiver(uint32_t vertex, size_t step, uint32_t num_members, unsigned int log2_num_members, size_t num_blocks, size_t total_steps)
Definition: schedule.cpp:380
static optional< block_transfer > get_outgoing_transfer(uint32_t node, size_t step, uint32_t num_members, unsigned int log2_num_members, size_t num_blocks, size_t total_steps)
Definition: schedule.cpp:234
size_t get_total_steps(size_t num_blocks) const
Definition: schedule.cpp:164
vector< uint32_t > get_connections() const
Definition: schedule.cpp:55
optional< block_transfer > get_outgoing_transfer(size_t num_blocks, size_t send_step) const
Definition: schedule.cpp:34
optional< block_transfer > get_first_block(size_t num_blocks) const
Definition: schedule.cpp:50
#define assert_always(x...)
Definition: schedule.cpp:10
optional< block_transfer > get_first_block(size_t num_blocks) const
Definition: schedule.cpp:476
optional< block_transfer > get_first_block(size_t num_blocks) const
Definition: schedule.cpp:88
optional< block_transfer > get_first_block(size_t num_blocks) const
Definition: schedule.cpp:131
optional< block_transfer > get_vertex_incoming_transfer(size_t receive_step)
size_t get_total_steps(size_t num_blocks) const
Definition: schedule.cpp:69
optional< block_transfer > get_outgoing_transfer(size_t num_blocks, size_t send_step) const
Definition: schedule.cpp:72
optional< block_transfer > get_incoming_transfer(size_t num_blocks, size_t receive_step) const
Definition: schedule.cpp:80