Derecho  0.9
Distributed systems toolkit for RDMA
view.cpp
Go to the documentation of this file.
1 #include <fstream>
2 #include <iostream>
3 #include <iterator>
4 #include <memory>
5 #include <sstream>
6 #include <string>
7 
8 #include <derecho/core/view.hpp>
9 
10 namespace derecho {
11 
12 using std::shared_ptr;
13 using std::string;
14 
15 SubView::SubView(int32_t num_members)
16  : mode(Mode::ORDERED),
17  members(num_members),
18  is_sender(num_members, 1),
19  member_ips_and_ports(num_members),
20  joined(0),
21  departed(0),
22  my_rank(-1),
23  profile("default") {}
24 
26  const std::vector<node_id_t>& members,
27  std::vector<int> is_sender,
28  const std::vector<std::tuple<ip_addr_t, uint16_t, uint16_t, uint16_t,
29  uint16_t>>& member_ips_and_ports,
30  const std::string profile)
31  : mode(mode),
32  members(members),
33  is_sender(members.size(), 1),
35  my_rank(-1),
36  profile(profile) {
37  // if the sender information is not provided, assume that all members are
38  // senders
39  if(is_sender.size()) {
40  this->is_sender = is_sender;
41  }
42 }
43 
44 int SubView::rank_of(const node_id_t& who) const {
45  for(std::size_t rank = 0; rank < members.size(); ++rank) {
46  if(members[rank] == who) {
47  return rank;
48  }
49  }
50  return -1;
51 }
52 
53 int SubView::sender_rank_of(uint32_t rank) const {
54  if(!is_sender[rank]) {
55  return -1;
56  }
57  int num = 0;
58  for(uint i = 0; i < rank; ++i) {
59  if(is_sender[i]) {
60  num++;
61  }
62  }
63  return num;
64 }
65 
66 uint32_t SubView::num_senders() const {
67  uint32_t num = 0;
68  for(const auto i : is_sender) {
69  if(i) {
70  num++;
71  }
72  }
73  return num;
74 }
75 
76 void SubView::init_joined_departed(const SubView& previous_subview) {
77  //To ensure this method is idempotent
78  joined.clear();
79  departed.clear();
80  std::set<node_id_t> prev_members(previous_subview.members.begin(),
81  previous_subview.members.end());
82  std::set<node_id_t> curr_members(members.begin(),
83  members.end());
84  std::set_difference(curr_members.begin(), curr_members.end(),
85  prev_members.begin(), prev_members.end(),
86  std::back_inserter(joined));
87  std::set_difference(prev_members.begin(), prev_members.end(),
88  curr_members.begin(), curr_members.end(),
89  std::back_inserter(departed));
90 }
91 
92 View::View(const int32_t vid, const std::vector<node_id_t>& members,
93  const std::vector<std::tuple<ip_addr_t, uint16_t, uint16_t, uint16_t, uint16_t>>& member_ips_and_ports,
94  const std::vector<char>& failed, const int32_t num_failed,
95  const std::vector<node_id_t>& joined,
96  const std::vector<node_id_t>& departed,
97  const int32_t num_members,
98  const int32_t next_unassigned_rank,
99  const std::map<subgroup_type_id_t, std::vector<subgroup_id_t>>& subgroup_ids_by_type_id,
100  const std::vector<std::vector<SubView>>& subgroup_shard_views,
101  const std::map<subgroup_id_t, uint32_t>& my_subgroups)
102  : vid(vid),
103  members(members),
105  failed(failed),
106  num_failed(num_failed),
107  joined(joined),
108  departed(departed),
109  num_members(num_members),
110  my_rank(0), // This will always get overwritten by the receiver after deserializing
111  next_unassigned_rank(next_unassigned_rank),
112  subgroup_ids_by_type_id(subgroup_ids_by_type_id),
113  subgroup_shard_views(subgroup_shard_views),
114  my_subgroups(my_subgroups) {
115  for(int rank = 0; rank < num_members; ++rank) {
116  node_id_to_rank[members[rank]] = rank;
117  }
118 }
119 
121  for(int r = 0; r < num_members; ++r) {
122  if(!failed[r]) {
123  return r;
124  }
125  }
126  return -1;
127 }
128 
129 View::View(const int32_t vid, const std::vector<node_id_t>& members,
130  const std::vector<std::tuple<ip_addr_t, uint16_t, uint16_t, uint16_t, uint16_t>>& member_ips_and_ports,
131  const std::vector<char>& failed, const std::vector<node_id_t>& joined,
132  const std::vector<node_id_t>& departed,
133  const int32_t my_rank,
134  const int32_t next_unassigned_rank,
135  const std::vector<std::type_index>& subgroup_type_order)
136  : vid(vid),
137  members(members),
139  failed(failed),
140  num_failed(0),
141  joined(joined),
142  departed(departed),
143  num_members(members.size()),
144  my_rank(my_rank),
145  next_unassigned_rank(next_unassigned_rank),
146  subgroup_type_order(subgroup_type_order) {
147  for(int rank = 0; rank < num_members; ++rank) {
148  node_id_to_rank[members[rank]] = rank;
149  }
150  for(auto c : failed) {
151  if(c) {
152  num_failed++;
153  }
154  }
155 }
156 
157 int View::rank_of(const std::tuple<ip_addr_t, uint16_t, uint16_t, uint16_t, uint16_t>& who) const {
158  for(int rank = 0; rank < num_members; ++rank) {
159  if(member_ips_and_ports[rank] == who) {
160  return rank;
161  }
162  }
163  return -1;
164 }
165 
166 int View::rank_of(const node_id_t& who) const {
167  auto it = node_id_to_rank.find(who);
168  if(it != node_id_to_rank.end()) {
169  return it->second;
170  }
171  return -1;
172 }
173 
174 SubView View::make_subview(const std::vector<node_id_t>& with_members,
175  const Mode mode,
176  const std::vector<int>& is_sender,
177  std::string profile) const {
178  // Make the profile string all uppercase so that it is effectively case-insensitive
179  std::transform(profile.begin(), profile.end(), profile.begin(), ::toupper);
180  std::vector<std::tuple<ip_addr_t, uint16_t, uint16_t, uint16_t, uint16_t>> subview_member_ips_and_ports(with_members.size());
181  for(std::size_t subview_rank = 0; subview_rank < with_members.size(); ++subview_rank) {
182  int view_rank_of_member = rank_of(with_members[subview_rank]);
183  if(view_rank_of_member == -1) {
184  // The ID wasn't found in members[]
186  }
187  subview_member_ips_and_ports[subview_rank] = member_ips_and_ports[view_rank_of_member];
188  }
189  // Note that joined and departed do not need to get initialized here; they will be initialized by ViewManager
190  return SubView(mode, with_members, is_sender, subview_member_ips_and_ports, profile);
191 }
192 
194  uint32_t shard_index) const {
195  if(shard_index >= subgroup_shard_views.at(subgroup_id).size()) {
196  return -1;
197  }
198  const SubView& shard_view = subgroup_shard_views.at(subgroup_id).at(shard_index);
199  for(std::size_t rank = 0; rank < shard_view.members.size(); ++rank) {
200  // Inefficient to call rank_of every time, but no guarantee the subgroup
201  // members will have ascending ranks
202  if(!failed[rank_of(shard_view.members[rank])]) {
203  return rank;
204  }
205  }
206  return -1;
207 }
208 
209 bool View::i_am_leader() const {
210  return (find_rank_of_leader() == my_rank); // True if I know myself to be the leader
211 }
212 
214  if(i_know_i_am_leader) {
215  return false; // I am the OLD leader
216  }
217 
218  for(int n = 0; n < my_rank; n++) {
219  for(int row = 0; row < my_rank; row++) {
220  if(!failed[n] && !gmsSST->suspected[row][n]) {
221  return false; // I'm not the new leader, or some failure suspicion hasn't fully propagated
222  }
223  }
224  }
225  i_know_i_am_leader = true;
226  return true;
227 }
228 
230  int myRank = my_rank;
231  // Merge the change lists
232  for(int n = 0; n < num_members; n++) {
233  if(gmsSST->num_changes[myRank] < gmsSST->num_changes[n]) {
234  gmssst::set(gmsSST->changes[myRank], gmsSST->changes[n],
235  gmsSST->changes.size());
236  gmssst::set(gmsSST->num_changes[myRank], gmsSST->num_changes[n]);
237  }
238 
239  if(gmsSST->num_committed[myRank] < gmsSST->num_committed[n]) // How many I know to have been committed
240  {
241  gmssst::set(gmsSST->num_committed[myRank], gmsSST->num_committed[n]);
242  }
243  }
244  bool found = false;
245  for(int n = 0; n < num_members; n++) {
246  if(failed[n]) {
247  // Make sure that the failed process is listed in the changes vector as a
248  // proposed change
249  for(int c = gmsSST->num_committed[myRank];
250  c < gmsSST->num_changes[myRank] && !found; c++) {
251  if(gmsSST->changes[myRank][c % gmsSST->changes.size()] == members[n]) {
252  // Already listed
253  found = true;
254  }
255  }
256  } else {
257  // Not failed
258  found = true;
259  }
260 
261  if(!found) {
262  gmssst::set(gmsSST->changes[myRank][gmsSST->num_changes[myRank] % gmsSST->changes.size()],
263  members[n]);
264  gmssst::increment(gmsSST->num_changes[myRank]);
265  }
266  }
267  // gmsSST->put(gmsSST->changes.get_base() - gmsSST->getBaseAddress(),
268  // gmsSST->num_acked.get_base() - gmsSST->changes.get_base());
269  /* breaking the above put statement into individual put calls, to be sure that
270  * if we were relying on any ordering guarantees, we won't run into issue when
271  * guarantees do not hold*/
272  gmsSST->put(gmsSST->changes.get_base() - gmsSST->getBaseAddress(),
273  gmsSST->joiner_ips.get_base() - gmsSST->changes.get_base());
274  gmsSST->put(gmsSST->joiner_ips.get_base() - gmsSST->getBaseAddress(),
275  gmsSST->num_changes.get_base() - gmsSST->joiner_ips.get_base());
276  gmsSST->put(gmsSST->num_changes.get_base() - gmsSST->getBaseAddress(),
277  gmsSST->num_committed.get_base() - gmsSST->num_changes.get_base());
278  gmsSST->put(gmsSST->num_committed.get_base() - gmsSST->getBaseAddress(),
279  gmsSST->num_acked.get_base() - gmsSST->num_committed.get_base());
280 }
281 
282 void View::wedge() {
283  multicast_group->wedge(); // RDMC finishes sending, stops new sends or receives in Vc
284  gmssst::set(gmsSST->wedged[my_rank], true);
285  gmsSST->put(gmsSST->wedged.get_base() - gmsSST->getBaseAddress(),
286  sizeof(gmsSST->wedged[0]));
287 }
288 
289 std::string View::debug_string() const {
290  // need to add member ips and ports and other fields
291  std::stringstream s;
292  s << "View " << vid << ": MyRank=" << my_rank << ". ";
293  s << "Members={ ";
294  for(int m = 0; m < num_members; m++) {
295  s << members[m] << " ";
296  }
297  s << "}, ";
298  string fs = (" ");
299  for(int m = 0; m < num_members; m++) {
300  fs += failed[m] ? string(" T ") : string(" F ");
301  }
302 
303  s << "Failed={" << fs << " }, num_failed=" << num_failed;
304  s << ", Departed: { ";
305  for(const node_id_t& departed_node : departed) {
306  s << departed_node << " ";
307  }
308  s << "} , Joined: { ";
309  for(const node_id_t& joined_node : joined) {
310  s << joined_node << " ";
311  }
312  s << "}" << std::endl;
313  s << "SubViews: ";
314  for(subgroup_id_t subgroup = 0; subgroup < subgroup_shard_views.size(); ++subgroup) {
315  for(uint32_t shard = 0; shard < subgroup_shard_views[subgroup].size(); ++shard) {
316  s << "Shard (" << subgroup << ", " << shard << "): Members={";
317  for(const node_id_t& member : subgroup_shard_views[subgroup][shard].members) {
318  s << member << " ";
319  }
320  s << "}, is_sender={";
321  for(uint i = 0; i < subgroup_shard_views[subgroup][shard].members.size(); ++i) {
322  if(subgroup_shard_views[subgroup][shard].is_sender[i]) {
323  s << "T ";
324  } else {
325  s << "F ";
326  }
327  }
328  s << "}. ";
329  }
330  }
331  return s.str();
332 }
333 
334 } // namespace derecho
uint32_t subgroup_id_t
Type alias for the internal Subgroup IDs generated by ViewManager.
std::vector< std::vector< SubView > > subgroup_shard_views
Maps subgroup ID -> shard number -> SubView for that subgroup/shard.
Definition: view.hpp:143
const std::vector< node_id_t > members
Node IDs of members in the current view, indexed by their SST rank.
Definition: view.hpp:99
std::vector< int > is_sender
vector selecting the senders, 0 for non-sender, non-0 for sender
Definition: view.hpp:39
View(const int32_t vid, const std::vector< node_id_t > &members, const std::vector< std::tuple< ip_addr_t, uint16_t, uint16_t, uint16_t, uint16_t >> &member_ips_and_ports, const std::vector< char > &failed, const int32_t num_failed, const std::vector< node_id_t > &joined, const std::vector< node_id_t > &departed, const int32_t num_members, const int32_t next_unassigned_rank, const std::map< subgroup_type_id_t, std::vector< subgroup_id_t >> &subgroup_ids_by_type_id, const std::vector< std::vector< SubView >> &subgroup_shard_views, const std::map< subgroup_id_t, uint32_t > &my_subgroups)
Constructor used by deserialization: constructs a View given the values of its serialized fields...
Definition: view.cpp:92
uint32_t subgroup_type_id_t
Type of the numeric ID used to refer to subgroup types within a Group; this is currently computed as ...
int32_t next_unassigned_rank
The rank of the lowest-ranked member that is not assigned to a subgroup in this View.
Definition: view.hpp:124
int rank_of(const node_id_t &who) const
Looks up the sub-view rank of a node ID.
Definition: view.cpp:44
SubView(int32_t num_members)
Creates an empty new SubView with num_members members.
Definition: view.cpp:15
int32_t my_rank
The rank of this node within the subgroup/shard, or -1 if this node is not a member of the subgroup/s...
Definition: view.hpp:49
int find_rank_of_leader() const
Returns the rank of this View&#39;s leader, based on failed[].
Definition: view.cpp:120
std::unique_ptr< MulticastGroup > multicast_group
RDMC manager object used for sending multicasts.
Definition: view.hpp:126
const std::string profile
Settings for the subview.
Definition: view.hpp:51
void increment(volatile int &member)
Thread-safe increment of an integer member of GMSTableRow; ensures there is a std::atomic_signal_fenc...
int rank_of(const std::tuple< ip_addr_t, uint16_t, uint16_t, uint16_t, uint16_t > &who) const
Looks up the SST rank of an IP address.
Definition: view.cpp:157
std::vector< node_id_t > departed
List of IDs of nodes that left since the previous view, if any.
Definition: view.hpp:112
std::vector< node_id_t > departed
List of IDs of nodes that left since the previous view, if any.
Definition: view.hpp:45
std::vector< char > failed
failed[i] is true if members[i] is considered to have failed.
Definition: view.hpp:104
void set(volatile Elem &e, const Elem &value)
Thread-safe setter for DerechoSST members; ensures there is a std::atomic_signal_fence after writing ...
std::shared_ptr< DerechoSST > gmsSST
Pointer to the SST instance used by the GMS in this View.
Definition: view.hpp:128
std::vector< std::tuple< ip_addr_t, uint16_t, uint16_t, uint16_t, uint16_t > > member_ips_and_ports
IP addresses and ports of members in this subgroup/shard, with the same indices as members...
Definition: view.hpp:41
An exception that indicates that a subgroup membership function was unable to finish executing becaus...
int32_t num_failed
Number of current outstanding failures in this view.
Definition: view.hpp:108
bool i_am_new_leader()
Determines whether this node is the new leader after a view change.
Definition: view.cpp:213
std::string ip_addr_t
Type alias for IP addresses, currently stored as strings.
bool i_am_leader() const
Definition: view.cpp:209
SubView make_subview(const std::vector< node_id_t > &with_members, const Mode mode=Mode::ORDERED, const std::vector< int > &is_sender={}, std::string profile="default") const
Constructs a SubView containing the provided subset of this View&#39;s members.
Definition: view.cpp:174
const std::vector< std::tuple< ip_addr_t, uint16_t, uint16_t, uint16_t, uint16_t > > member_ips_and_ports
IP addresses and ports (gms, rpc, sst, rdmc in order) of members in the current view, indexed by their SST rank.
Definition: view.hpp:101
void merge_changes()
Merges changes lists from other SST rows into this node&#39;s SST row.
Definition: view.cpp:229
void wedge()
Wedges the view, which means wedging both SST and DerechoGroup.
Definition: view.cpp:282
int sender_rank_of(uint32_t rank) const
Looks up the sender rank of a given member.
Definition: view.cpp:53
std::string debug_string() const
Builds a human-readable string representing the state of the view.
Definition: view.cpp:289
const int32_t vid
Sequential view ID: 0, 1, ...
Definition: view.hpp:97
uint32_t node_id_t
Type alias for Node IDs in a Derecho group.
bool i_know_i_am_leader
Definition: view.hpp:149
The subset of a View associated with a single shard, or a single subgroup if the subgroup is non-shar...
Definition: view.hpp:31
std::vector< node_id_t > members
Node IDs of members in this subgroup/shard, indexed by their order in the SST.
Definition: view.hpp:36
int subview_rank_of_shard_leader(subgroup_id_t subgroup_id, uint32_t shard_index) const
Computes the within-shard rank of a particular shard&#39;s leader, based on failed[]. ...
Definition: view.cpp:193
uint32_t num_senders() const
returns the number of senders in the subview
Definition: view.cpp:66
Mode mode
Operation mode, raw mode does not do stability and delivery.
Definition: view.hpp:34
void init_joined_departed(const SubView &previous_subview)
Initialization helper method that initializes the joined and departed lists given the previous View&#39;s...
Definition: view.cpp:76
std::vector< std::type_index > subgroup_type_order
The order of subgroup types as they were declared in the Group&#39;s template parameters.
Definition: view.hpp:133
int32_t my_rank
The rank of this node (as returned by rank_of())
Definition: view.hpp:116
std::vector< node_id_t > joined
List of IDs of nodes that joined since the previous view, if any.
Definition: view.hpp:110
std::map< node_id_t, uint32_t > node_id_to_rank
Reverse index of members[]; maps node ID -> SST rank.
Definition: view.hpp:147
std::vector< node_id_t > joined
List of IDs of nodes that joined since the previous view, if any.
Definition: view.hpp:43
const int32_t num_members
Number of members in this view.
Definition: view.hpp:114