10 #define assert_always(x...) assert(x) 12 #define assert_always(x...) if(!x){abort();} 37 if(member_index > step || block_number >= num_blocks || member_index ==
num_members - 1) {
77 size_t block_number = step % num_blocks;
78 return block_transfer{(uint32_t)(1 + step / num_blocks), block_number};
85 size_t block_number = step % num_blocks;
95 for(uint32_t i = 0; i < 32; i++) {
110 unsigned int log2_num_members = ceil(log2(
num_members));
111 return num_blocks * log2_num_members;
114 size_t stage = step / num_blocks;
119 step - stage * num_blocks};
123 size_t stage = step / num_blocks;
126 step - stage * num_blocks};
134 for(uint32_t i = 0; i < 32; i++) {
141 vector<uint32_t> ret;
143 uint32_t twin = (
member_index < (1u << log2_num_members))
151 for(
size_t i = 0; i < log2_num_members; ++i) {
153 uint32_t neighbor = vertex ^ (1 << i);
154 uint32_t neighbor_twin = neighbor + (1 << log2_num_members) - 1;
156 ret.push_back(neighbor);
158 ret.push_back(neighbor_twin);
166 return num_blocks + log2_num_members - 1;
168 return num_blocks + log2_num_members;
171 uint32_t sender,
size_t send_step, uint32_t
num_members,
172 unsigned int log2_num_members,
size_t num_blocks,
size_t total_steps) {
185 size_t rank_mask = (~((size_t)0)) >> (
sizeof(
size_t) * CHAR_BIT - log2_num_members);
187 size_t step_index = send_step % log2_num_members;
188 uint32_t neighbor = sender ^ (1 << step_index);
189 if(send_step >= total_steps || neighbor == 0 || (send_step == total_steps - 1 && num_members > 1u << log2_num_members)) {
196 size_t rotated_rank = ((neighbor | (neighbor << log2_num_members)) >> step_index) & rank_mask;
201 if((rotated_rank & 1) == 0) {
202 if(send_step < log2_num_members) {
210 for(
unsigned int index = 1; index < log2_num_members; index++) {
211 if(rotated_rank & (1 << index)) {
212 if(send_step + index < log2_num_members) {
215 size_t block_number = min(send_step + index - log2_num_members, num_blocks - 1);
220 size_t block_number = min(send_step, num_blocks - 1);
224 uint32_t vertex,
size_t send_step, uint32_t
num_members,
225 unsigned int log2_num_members,
size_t num_blocks,
size_t total_steps) {
226 size_t step_index = send_step % log2_num_members;
227 uint32_t neighbor = vertex ^ (1 << step_index);
229 auto transfer = get_vertex_outgoing_transfer(neighbor, send_step, num_members,
230 log2_num_members, num_blocks, total_steps);
231 if(!transfer)
return std::nullopt;
236 unsigned int log2_num_members,
size_t num_blocks,
size_t total_steps) {
237 uint32_t vertex = (node < (1u << log2_num_members))
239 : node + 1 - (1 << log2_num_members);
240 uint32_t intervertex_receiver = get_intervertex_receiver(
241 vertex, step, num_members, log2_num_members, num_blocks, total_steps);
243 if(step >= total_steps) {
245 }
else if(step == total_steps - 1 && num_blocks == 1 && num_members > (1u << log2_num_members)) {
246 uint32_t intervertex_receiver = get_intervertex_receiver(vertex, step, num_members,
247 log2_num_members, num_blocks, total_steps);
249 uint32_t target = get_intervertex_receiver(vertex ^ 1, step, num_members,
250 log2_num_members, num_blocks, total_steps);
252 bool node_has_twin = node != 0 && vertex + (1 << log2_num_members) - 1 < num_members;
253 bool target_has_twin = target != 0 && (target >= (1u << log2_num_members) || target + (1u << log2_num_members) - 1 <
num_members);
255 if((node_has_twin && node == intervertex_receiver) || node == 1 || !target_has_twin)
259 }
else if(node == intervertex_receiver && vertex != 0 && vertex + (1u << log2_num_members) - 1 <
num_members) {
260 auto block = get_intravertex_block(vertex, step, num_members, log2_num_members,
261 num_blocks, total_steps);
263 if(!block)
return std::nullopt;
264 uint32_t twin = (node < (1u << log2_num_members))
265 ? node + (1 << log2_num_members) - 1
266 : node + 1 - (1 << log2_num_members);
269 if(step == total_steps - 1 && num_members > 1u << log2_num_members) {
270 if((vertex + (1u << log2_num_members) - 1) >= num_members || vertex == 0)
273 uint32_t twin = (node < (1u << log2_num_members))
274 ? node + (1 << log2_num_members) - 1
275 : node + 1 - (1 << log2_num_members);
281 r = get_intervertex_receiver(vertex, --s, num_members,
282 log2_num_members, num_blocks,
285 auto transfer = get_vertex_incoming_transfer(
286 vertex, s, num_members, log2_num_members, num_blocks,
290 transfer->target = twin;
294 auto transfer = get_vertex_outgoing_transfer(vertex, step, num_members,
296 num_blocks, total_steps);
299 transfer->target = get_intervertex_receiver(
300 transfer->target, step, num_members, log2_num_members,
301 num_blocks, total_steps);
308 unsigned int log2_num_members,
size_t num_blocks,
size_t total_steps) {
309 uint32_t vertex = (node < (1u << log2_num_members))
311 : node + 1 - (1 << log2_num_members);
312 uint32_t intervertex_receiver = get_intervertex_receiver(
313 vertex, step, num_members, log2_num_members, num_blocks, total_steps);
315 if(step >= total_steps) {
317 }
else if(step == total_steps - 1 && num_blocks == 1 && num_members > (1u << log2_num_members)) {
318 uint32_t target = get_intervertex_receiver(vertex ^ 1, step, num_members,
319 log2_num_members, num_blocks, total_steps);
321 bool node_has_twin = node != 0 && vertex + (1 << log2_num_members) - 1 < num_members;
323 if(target >= (1u << log2_num_members))
324 target = target + 1 - (1 << log2_num_members);
325 else if(target != 0 && target + (1 << log2_num_members) - 1 <
num_members)
326 target = target + (1 << log2_num_members) - 1;
328 if(!node_has_twin || node != intervertex_receiver)
333 }
else if(node != intervertex_receiver) {
334 auto block = get_intravertex_block(vertex, step, num_members, log2_num_members,
335 num_blocks, total_steps);
337 if(!block)
return std::nullopt;
338 uint32_t twin = (node < (1u << log2_num_members))
339 ? node + (1 << log2_num_members) - 1
340 : node + 1 - (1 << log2_num_members);
343 if(step == total_steps - 1 && num_members > 1u << log2_num_members) {
344 if((vertex + (1u << log2_num_members) - 1) >= num_members || vertex == 0)
347 uint32_t twin = (node < (1u << log2_num_members))
348 ? node + (1 << log2_num_members) - 1
349 : node + 1 - (1 << log2_num_members);
352 num_blocks, total_steps);
356 transfer->target = twin;
360 auto transfer = get_vertex_incoming_transfer(vertex, step, num_members,
362 num_blocks, total_steps);
364 uint32_t neighbor = get_intervertex_receiver(
365 transfer->target, step, num_members, log2_num_members,
366 num_blocks, total_steps);
369 transfer->target = neighbor;
370 else if(neighbor >= (1u << log2_num_members))
371 transfer->target = neighbor + 1 - (1u << log2_num_members);
372 else if(neighbor + (1u << log2_num_members) - 1 <
num_members)
373 transfer->target = neighbor + (1u << log2_num_members) - 1;
375 transfer->target = neighbor;
382 unsigned int log2_num_members,
384 size_t total_steps) {
387 if((vertex + (1u << log2_num_members) - 1) >= num_members || vertex == 0)
391 for(
int i = 0; i < 32; i++) {
392 if(vertex & (1 << i)) weight++;
398 auto total_flips = [&](
size_t step) {
399 size_t flips = (step / log2_num_members) * weight;
400 for(
unsigned int i = 0; i < step % log2_num_members; i++) {
401 if(vertex & (1 << i)) flips++;
406 if(total_flips(step) % 2 == 1) {
407 return vertex + (1u << log2_num_members) - 1;
412 uint32_t vertex,
size_t step, uint32_t
num_members,
413 unsigned int log2_num_members,
size_t num_blocks,
size_t total_steps) {
416 if((vertex + (1u << log2_num_members) - 1) >= num_members || vertex == 0)
420 for(
int i = 0; i < 32; i++) {
421 if(vertex & (1 << i)) weight++;
427 auto total_flips = [&](
size_t step) {
428 size_t flips = ((step) / log2_num_members) * weight;
429 for(
unsigned int i = 0; i < (step) % log2_num_members; i++) {
430 if(vertex & (1 << i)) flips++;
435 size_t flips = total_flips(step);
448 size_t prev_receive_block_step = step - 1;
449 if(flips != total_flips(step - 1)) {
450 if(flips <= 1)
return std::nullopt;
452 while(total_flips(prev_receive_block_step) != flips - 2) {
453 --prev_receive_block_step;
457 auto last = get_vertex_incoming_transfer(vertex, prev_receive_block_step,
458 num_members, log2_num_members,
459 num_blocks, total_steps);
461 if(!last)
return std::nullopt;
462 return last->block_number;
467 log2_num_members, num_blocks,
472 log2_num_members, num_blocks,
479 size_t simulated_total_steps =
num_members == 1u << log2_num_members
480 ? 1024 + log2_num_members - 1
481 : 1024 + log2_num_members;
484 optional<block_transfer> transfer;
487 log2_num_members, 1024,
488 simulated_total_steps);
489 assert(step < simulated_total_steps);
optional< block_transfer > get_incoming_transfer(size_t num_blocks, size_t receive_step) const
vector< uint32_t > get_connections() const
vector< uint32_t > get_connections() const
optional< block_transfer > get_incoming_transfer(size_t num_blocks, size_t receive_step) const
static optional< block_transfer > get_incoming_transfer(uint32_t node, size_t step, uint32_t num_members, unsigned int log2_num_members, size_t num_blocks, size_t total_steps)
size_t get_total_steps(size_t num_blocks) const
vector< uint32_t > get_connections() const
size_t get_total_steps(size_t num_blocks) const
const uint32_t num_members
static optional< size_t > get_intravertex_block(uint32_t vertex, size_t step, uint32_t num_members, unsigned int log2_num_members, size_t num_blocks, size_t total_steps)
optional< block_transfer > get_outgoing_transfer(size_t num_blocks, size_t send_step) const
const uint32_t member_index
optional< block_transfer > get_vertex_outgoing_transfer(size_t send_step)
static uint32_t get_intervertex_receiver(uint32_t vertex, size_t step, uint32_t num_members, unsigned int log2_num_members, size_t num_blocks, size_t total_steps)
static optional< block_transfer > get_outgoing_transfer(uint32_t node, size_t step, uint32_t num_members, unsigned int log2_num_members, size_t num_blocks, size_t total_steps)
size_t get_total_steps(size_t num_blocks) const
vector< uint32_t > get_connections() const
optional< block_transfer > get_outgoing_transfer(size_t num_blocks, size_t send_step) const
optional< block_transfer > get_first_block(size_t num_blocks) const
#define assert_always(x...)
optional< block_transfer > get_first_block(size_t num_blocks) const
optional< block_transfer > get_first_block(size_t num_blocks) const
optional< block_transfer > get_first_block(size_t num_blocks) const
optional< block_transfer > get_vertex_incoming_transfer(size_t receive_step)
size_t get_total_steps(size_t num_blocks) const
optional< block_transfer > get_outgoing_transfer(size_t num_blocks, size_t send_step) const
optional< block_transfer > get_incoming_transfer(size_t num_blocks, size_t receive_step) const