pacemaker  2.0.1-9e909a5bdd
Scalable High-Availability cluster resource manager
unpack.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This source code is licensed under the GNU Lesser General Public License
5  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
6  */
7 
8 #include <crm_internal.h>
9 
10 #include <glib.h>
11 
12 #include <crm/crm.h>
13 #include <crm/services.h>
14 #include <crm/msg_xml.h>
15 #include <crm/common/xml.h>
16 
17 #include <crm/common/util.h>
18 #include <crm/pengine/rules.h>
19 #include <crm/pengine/internal.h>
20 #include <unpack.h>
21 #include <pe_status_private.h>
22 
23 CRM_TRACE_INIT_DATA(pe_status);
24 
25 #define set_config_flag(data_set, option, flag) do { \
26  const char *tmp = pe_pref(data_set->config_hash, option); \
27  if(tmp) { \
28  if(crm_is_true(tmp)) { \
29  set_bit(data_set->flags, flag); \
30  } else { \
31  clear_bit(data_set->flags, flag); \
32  } \
33  } \
34  } while(0)
35 
36 gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
37  enum action_fail_response *failed, pe_working_set_t * data_set);
38 static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node);
39 
40 // Bitmask for warnings we only want to print once
42 
43 static gboolean
44 is_dangling_container_remote_node(node_t *node)
45 {
46  /* we are looking for a remote-node that was supposed to be mapped to a
47  * container resource, but all traces of that container have disappeared
48  * from both the config and the status section. */
49  if (is_remote_node(node) &&
50  node->details->remote_rsc &&
51  node->details->remote_rsc->container == NULL &&
53  return TRUE;
54  }
55 
56  return FALSE;
57 }
58 
59 
67 void
68 pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
69 {
70  CRM_CHECK(node, return);
71 
72  /* A guest node is fenced by marking its container as failed */
73  if (is_container_remote_node(node)) {
74  resource_t *rsc = node->details->remote_rsc->container;
75 
76  if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
77  if (!is_set(rsc->flags, pe_rsc_managed)) {
78  crm_notice("Not fencing guest node %s "
79  "(otherwise would because %s): "
80  "its guest resource %s is unmanaged",
81  node->details->uname, reason, rsc->id);
82  } else {
83  crm_warn("Guest node %s will be fenced "
84  "(by recovering its guest resource %s): %s",
85  node->details->uname, rsc->id, reason);
86 
87  /* We don't mark the node as unclean because that would prevent the
88  * node from running resources. We want to allow it to run resources
89  * in this transition if the recovery succeeds.
90  */
91  node->details->remote_requires_reset = TRUE;
93  }
94  }
95 
96  } else if (is_dangling_container_remote_node(node)) {
97  crm_info("Cleaning up dangling connection for guest node %s: "
98  "fencing was already done because %s, "
99  "and guest resource no longer exists",
100  node->details->uname, reason);
102 
103  } else if (is_baremetal_remote_node(node)) {
104  resource_t *rsc = node->details->remote_rsc;
105 
106  if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
107  crm_notice("Not fencing remote node %s "
108  "(otherwise would because %s): connection is unmanaged",
109  node->details->uname, reason);
110  } else if(node->details->remote_requires_reset == FALSE) {
111  node->details->remote_requires_reset = TRUE;
112  crm_warn("Remote node %s %s: %s",
113  node->details->uname,
114  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
115  reason);
116  }
117  node->details->unclean = TRUE;
118  pe_fence_op(node, NULL, TRUE, reason, data_set);
119 
120  } else if (node->details->unclean) {
121  crm_trace("Cluster node %s %s because %s",
122  node->details->uname,
123  pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
124  reason);
125 
126  } else {
127  crm_warn("Cluster node %s %s: %s",
128  node->details->uname,
129  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
130  reason);
131  node->details->unclean = TRUE;
132  pe_fence_op(node, NULL, TRUE, reason, data_set);
133  }
134 }
135 
136 // @TODO xpaths can't handle templates, rules, or id-refs
137 
138 // nvpair with provides or requires set to unfencing
139 #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
140  "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \
141  "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
142  "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
143 
144 // unfencing in rsc_defaults or any resource
145 #define XPATH_ENABLE_UNFENCING \
146  "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
147  "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
148  "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
149  "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
150 
151 static
152 void set_if_xpath(unsigned long long flag, const char *xpath,
153  pe_working_set_t *data_set)
154 {
155  xmlXPathObjectPtr result = NULL;
156 
157  if (is_not_set(data_set->flags, flag)) {
158  result = xpath_search(data_set->input, xpath);
159  if (result && (numXpathResults(result) > 0)) {
160  set_bit(data_set->flags, flag);
161  }
162  freeXpathObject(result);
163  }
164 }
165 
166 gboolean
167 unpack_config(xmlNode * config, pe_working_set_t * data_set)
168 {
169  const char *value = NULL;
170  GHashTable *config_hash = crm_str_table_new();
171 
172  data_set->config_hash = config_hash;
173 
174  unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
175  CIB_OPTIONS_FIRST, FALSE, data_set->now);
176 
177  verify_pe_options(data_set->config_hash);
178 
179  set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
180  if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
181  crm_info("Startup probes: disabled (dangerous)");
182  }
183 
184  value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
185  if (value && crm_is_true(value)) {
186  crm_notice("Watchdog will be used via SBD if fencing is required");
188  }
189 
190  /* Set certain flags via xpath here, so they can be used before the relevant
191  * configuration sections are unpacked.
192  */
193  set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
194 
195  value = pe_pref(data_set->config_hash, "stonith-timeout");
196  data_set->stonith_timeout = crm_get_msec(value);
197  crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
198 
199  set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
200  crm_debug("STONITH of failed nodes is %s",
201  is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
202 
203  data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
204  if (!strcmp(data_set->stonith_action, "poweroff")) {
206  "Support for stonith-action of 'poweroff' is deprecated "
207  "and will be removed in a future release (use 'off' instead)");
208  data_set->stonith_action = "off";
209  }
210  crm_trace("STONITH will %s nodes", data_set->stonith_action);
211 
212  set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
213  crm_debug("Concurrent fencing is %s",
214  is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled");
215 
216  set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
217  crm_debug("Stop all active resources: %s",
218  is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
219 
220  set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
221  if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
222  crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
223  }
224 
225  value = pe_pref(data_set->config_hash, "no-quorum-policy");
226 
227  if (safe_str_eq(value, "ignore")) {
229 
230  } else if (safe_str_eq(value, "freeze")) {
232 
233  } else if (safe_str_eq(value, "suicide")) {
234  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
235  int do_panic = 0;
236 
238  &do_panic);
239  if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) {
241  } else {
242  crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
243  data_set->no_quorum_policy = no_quorum_stop;
244  }
245  } else {
246  crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured");
247  data_set->no_quorum_policy = no_quorum_stop;
248  }
249 
250  } else {
251  data_set->no_quorum_policy = no_quorum_stop;
252  }
253 
254  switch (data_set->no_quorum_policy) {
255  case no_quorum_freeze:
256  crm_debug("On loss of quorum: Freeze resources");
257  break;
258  case no_quorum_stop:
259  crm_debug("On loss of quorum: Stop ALL resources");
260  break;
261  case no_quorum_suicide:
262  crm_notice("On loss of quorum: Fence all remaining nodes");
263  break;
264  case no_quorum_ignore:
265  crm_notice("On loss of quorum: Ignore");
266  break;
267  }
268 
269  set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
270  crm_trace("Orphan resources are %s",
271  is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
272 
273  set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
274  crm_trace("Orphan resource actions are %s",
275  is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
276 
277  set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
278  crm_trace("Stopped resources are removed from the status section: %s",
279  is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
280 
281  set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
282  crm_trace("Maintenance mode: %s",
283  is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
284 
285  set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
286  crm_trace("Start failures are %s",
287  is_set(data_set->flags,
288  pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
289 
290  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
291  set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
292  }
293  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
294  crm_trace("Unseen nodes will be fenced");
295  } else {
296  pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
297  }
298 
299  node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
300  node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
301  node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
302 
303  crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
304  pe_pref(data_set->config_hash, "node-health-red"),
305  pe_pref(data_set->config_hash, "node-health-yellow"),
306  pe_pref(data_set->config_hash, "node-health-green"));
307 
308  data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
309  crm_trace("Placement strategy: %s", data_set->placement_strategy);
310 
311  return TRUE;
312 }
313 
314 static void
315 destroy_digest_cache(gpointer ptr)
316 {
317  op_digest_cache_t *data = ptr;
318 
319  free_xml(data->params_all);
320  free_xml(data->params_secure);
321  free_xml(data->params_restart);
322 
323  free(data->digest_all_calc);
324  free(data->digest_restart_calc);
325  free(data->digest_secure_calc);
326 
327  free(data);
328 }
329 
330 node_t *
331 pe_create_node(const char *id, const char *uname, const char *type,
332  const char *score, pe_working_set_t * data_set)
333 {
334  node_t *new_node = NULL;
335 
336  if (pe_find_node(data_set->nodes, uname) != NULL) {
337  crm_config_warn("Detected multiple node entries with uname=%s"
338  " - this is rarely intended", uname);
339  }
340 
341  new_node = calloc(1, sizeof(node_t));
342  if (new_node == NULL) {
343  return NULL;
344  }
345 
346  new_node->weight = char2score(score);
347  new_node->fixed = FALSE;
348  new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
349 
350  if (new_node->details == NULL) {
351  free(new_node);
352  return NULL;
353  }
354 
355  crm_trace("Creating node for entry %s/%s", uname, id);
356  new_node->details->id = id;
357  new_node->details->uname = uname;
358  new_node->details->online = FALSE;
359  new_node->details->shutdown = FALSE;
360  new_node->details->rsc_discovery_enabled = TRUE;
361  new_node->details->running_rsc = NULL;
362  new_node->details->type = node_ping;
363 
364  if (safe_str_eq(type, "remote")) {
365  new_node->details->type = node_remote;
367  } else if ((type == NULL) || safe_str_eq(type, "member")) {
368  new_node->details->type = node_member;
369  }
370 
371  new_node->details->attrs = crm_str_table_new();
372 
373  if (is_remote_node(new_node)) {
374  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
375  strdup("remote"));
376  } else {
377  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
378  strdup("cluster"));
379  }
380 
381  new_node->details->utilization = crm_str_table_new();
382 
383  new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash,
384  g_str_equal, free,
385  destroy_digest_cache);
386 
387  data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
388  return new_node;
389 }
390 
391 bool
392 remote_id_conflict(const char *remote_name, pe_working_set_t *data)
393 {
394  bool match = FALSE;
395 #if 1
396  pe_find_resource(data->resources, remote_name);
397 #else
398  if (data->name_check == NULL) {
399  data->name_check = g_hash_table_new(crm_str_hash, g_str_equal);
400  for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) {
401  const char *id = ID(xml_rsc);
402 
403  /* avoiding heap allocation here because we know the duration of this hashtable allows us to */
404  g_hash_table_insert(data->name_check, (char *) id, (char *) id);
405  }
406  }
407  if (g_hash_table_lookup(data->name_check, remote_name)) {
408  match = TRUE;
409  }
410 #endif
411  if (match) {
412  crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name);
413  return NULL;
414  }
415 
416  return match;
417 }
418 
419 
420 static const char *
421 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
422 {
423  xmlNode *attr_set = NULL;
424  xmlNode *attr = NULL;
425 
426  const char *container_id = ID(xml_obj);
427  const char *remote_name = NULL;
428  const char *remote_server = NULL;
429  const char *remote_port = NULL;
430  const char *connect_timeout = "60s";
431  const char *remote_allow_migrate=NULL;
432  const char *container_managed = NULL;
433 
434  for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) {
435  if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
436  continue;
437  }
438 
439  for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next_element(attr)) {
440  const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
441  const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
442 
444  remote_name = value;
445  } else if (safe_str_eq(name, "remote-addr")) {
446  remote_server = value;
447  } else if (safe_str_eq(name, "remote-port")) {
448  remote_port = value;
449  } else if (safe_str_eq(name, "remote-connect-timeout")) {
450  connect_timeout = value;
451  } else if (safe_str_eq(name, "remote-allow-migrate")) {
452  remote_allow_migrate=value;
453  } else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) {
454  container_managed = value;
455  }
456  }
457  }
458 
459  if (remote_name == NULL) {
460  return NULL;
461  }
462 
463  if (remote_id_conflict(remote_name, data)) {
464  return NULL;
465  }
466 
467  pe_create_remote_xml(parent, remote_name, container_id,
468  remote_allow_migrate, container_managed,
469  connect_timeout, remote_server, remote_port);
470  return remote_name;
471 }
472 
473 static void
474 handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
475 {
476  if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
477  /* Ignore fencing for remote nodes that don't have a connection resource
478  * associated with them. This happens when remote node entries get left
479  * in the nodes section after the connection resource is removed.
480  */
481  return;
482  }
483 
484  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
485  // All nodes are unclean until we've seen their status entry
486  new_node->details->unclean = TRUE;
487 
488  } else {
489  // Blind faith ...
490  new_node->details->unclean = FALSE;
491  }
492 
493  /* We need to be able to determine if a node's status section
494  * exists or not separate from whether the node is unclean. */
495  new_node->details->unseen = TRUE;
496 }
497 
498 gboolean
499 unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
500 {
501  xmlNode *xml_obj = NULL;
502  node_t *new_node = NULL;
503  const char *id = NULL;
504  const char *uname = NULL;
505  const char *type = NULL;
506  const char *score = NULL;
507 
508  for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
509  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
510  new_node = NULL;
511 
512  id = crm_element_value(xml_obj, XML_ATTR_ID);
513  uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
514  type = crm_element_value(xml_obj, XML_ATTR_TYPE);
515  score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
516  crm_trace("Processing node %s/%s", uname, id);
517 
518  if (id == NULL) {
519  crm_config_err("Must specify id tag in <node>");
520  continue;
521  }
522  new_node = pe_create_node(id, uname, type, score, data_set);
523 
524  if (new_node == NULL) {
525  return FALSE;
526  }
527 
528 /* if(data_set->have_quorum == FALSE */
529 /* && data_set->no_quorum_policy == no_quorum_stop) { */
530 /* /\* start shutting resources down *\/ */
531 /* new_node->weight = -INFINITY; */
532 /* } */
533 
534  handle_startup_fencing(data_set, new_node);
535 
536  add_node_attrs(xml_obj, new_node, FALSE, data_set);
537  unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
538  new_node->details->utilization, NULL, FALSE, data_set->now);
539 
540  crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
541  }
542  }
543 
544  if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
545  crm_info("Creating a fake local node");
546  pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
547  data_set);
548  }
549 
550  return TRUE;
551 }
552 
553 static void
554 setup_container(resource_t * rsc, pe_working_set_t * data_set)
555 {
556  const char *container_id = NULL;
557 
558  if (rsc->children) {
559  GListPtr gIter = rsc->children;
560 
561  for (; gIter != NULL; gIter = gIter->next) {
562  resource_t *child_rsc = (resource_t *) gIter->data;
563 
564  setup_container(child_rsc, data_set);
565  }
566  return;
567  }
568 
569  container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
570  if (container_id && safe_str_neq(container_id, rsc->id)) {
571  resource_t *container = pe_find_resource(data_set->resources, container_id);
572 
573  if (container) {
574  rsc->container = container;
575  set_bit(container->flags, pe_rsc_is_container);
576  container->fillers = g_list_append(container->fillers, rsc);
577  pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
578  } else {
579  pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
580  }
581  }
582 }
583 
584 gboolean
585 unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
586 {
587  xmlNode *xml_obj = NULL;
588 
589  /* generate remote nodes from resource config before unpacking resources */
590  for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
591  const char *new_node_id = NULL;
592 
593  /* first check if this is a bare metal remote node. Bare metal remote nodes
594  * are defined as a resource primitive only. */
595  if (xml_contains_remote_node(xml_obj)) {
596  new_node_id = ID(xml_obj);
597  /* The "pe_find_node" check is here to make sure we don't iterate over
598  * an expanded node that has already been added to the node list. */
599  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
600  crm_trace("Found baremetal remote node %s in container resource %s", new_node_id, ID(xml_obj));
601  pe_create_node(new_node_id, new_node_id, "remote", NULL,
602  data_set);
603  }
604  continue;
605  }
606 
607  /* Now check for guest remote nodes.
608  * guest remote nodes are defined within a resource primitive.
609  * Example1: a vm resource might be configured as a remote node.
610  * Example2: a vm resource might be configured within a group to be a remote node.
611  * Note: right now we only support guest remote nodes in as a standalone primitive
612  * or a primitive within a group. No cloned primitives can be a guest remote node
613  * right now */
614  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
615  /* expands a metadata defined remote resource into the xml config
616  * as an actual rsc primitive to be unpacked later. */
617  new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
618 
619  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
620  crm_trace("Found guest remote node %s in container resource %s", new_node_id, ID(xml_obj));
621  pe_create_node(new_node_id, new_node_id, "remote", NULL,
622  data_set);
623  }
624  continue;
625 
626  } else if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
627  xmlNode *xml_obj2 = NULL;
628  /* search through a group to see if any of the primitive contain a remote node. */
629  for (xml_obj2 = __xml_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) {
630 
631  new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
632 
633  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
634  crm_trace("Found guest remote node %s in container resource %s which is in group %s", new_node_id, ID(xml_obj2), ID(xml_obj));
635  pe_create_node(new_node_id, new_node_id, "remote", NULL,
636  data_set);
637  }
638  }
639  }
640  }
641  return TRUE;
642 }
643 
644 
645 /* Call this after all the nodes and resources have been
646  * unpacked, but before the status section is read.
647  *
648  * A remote node's online status is reflected by the state
649  * of the remote node's connection resource. We need to link
650  * the remote node to this connection resource so we can have
651  * easy access to the connection resource during the PE calculations.
652  */
653 static void
654 link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
655 {
656  node_t *remote_node = NULL;
657 
658  if (new_rsc->is_remote_node == FALSE) {
659  return;
660  }
661 
662  if (is_set(data_set->flags, pe_flag_quick_location)) {
663  /* remote_nodes and remote_resources are not linked in quick location calculations */
664  return;
665  }
666 
667  print_resource(LOG_TRACE, "Linking remote-node connection resource, ", new_rsc, FALSE);
668 
669  remote_node = pe_find_node(data_set->nodes, new_rsc->id);
670  CRM_CHECK(remote_node != NULL, return;);
671 
672  remote_node->details->remote_rsc = new_rsc;
673  /* If this is a baremetal remote-node (no container resource
674  * associated with it) then we need to handle startup fencing the same way
675  * as cluster nodes. */
676  if (new_rsc->container == NULL) {
677  handle_startup_fencing(data_set, remote_node);
678  } else {
679  /* At this point we know if the remote node is a container or baremetal
680  * remote node, update the #kind attribute if a container is involved */
681  g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
682  strdup("container"));
683  }
684 }
685 
686 static void
687 destroy_tag(gpointer data)
688 {
689  tag_t *tag = data;
690 
691  if (tag) {
692  free(tag->id);
693  g_list_free_full(tag->refs, free);
694  free(tag);
695  }
696 }
697 
710 gboolean
711 unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
712 {
713  xmlNode *xml_obj = NULL;
714  GListPtr gIter = NULL;
715 
716  data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash,
717  g_str_equal, free,
718  destroy_tag);
719 
720  for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
721  resource_t *new_rsc = NULL;
722 
723  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
724  const char *template_id = ID(xml_obj);
725 
726  if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
727  template_id, NULL, NULL) == FALSE) {
728  /* Record the template's ID for the knowledge of its existence anyway. */
729  g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
730  }
731  continue;
732  }
733 
734  crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
735  if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
736  data_set->resources = g_list_append(data_set->resources, new_rsc);
737  print_resource(LOG_TRACE, "Added ", new_rsc, FALSE);
738 
739  } else {
740  crm_config_err("Failed unpacking %s %s",
741  crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
742  if (new_rsc != NULL && new_rsc->fns != NULL) {
743  new_rsc->fns->free(new_rsc);
744  }
745  }
746  }
747 
748  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
749  resource_t *rsc = (resource_t *) gIter->data;
750 
751  setup_container(rsc, data_set);
752  link_rsc2remotenode(data_set, rsc);
753  }
754 
755  data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
756  if (is_set(data_set->flags, pe_flag_quick_location)) {
757  /* Ignore */
758 
759  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)
760  && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
761 
762  crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
763  crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
764  crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
765  }
766 
767  return TRUE;
768 }
769 
770 gboolean
771 unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
772 {
773  xmlNode *xml_tag = NULL;
774 
775  data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
776  destroy_tag);
777 
778  for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) {
779  xmlNode *xml_obj_ref = NULL;
780  const char *tag_id = ID(xml_tag);
781 
782  if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
783  continue;
784  }
785 
786  if (tag_id == NULL) {
787  crm_config_err("Failed unpacking %s: %s should be specified",
788  crm_element_name(xml_tag), XML_ATTR_ID);
789  continue;
790  }
791 
792  for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) {
793  const char *obj_ref = ID(xml_obj_ref);
794 
795  if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
796  continue;
797  }
798 
799  if (obj_ref == NULL) {
800  crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
801  crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
802  continue;
803  }
804 
805  if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
806  return FALSE;
807  }
808  }
809  }
810 
811  return TRUE;
812 }
813 
814 /* The ticket state section:
815  * "/cib/status/tickets/ticket_state" */
816 static gboolean
817 unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
818 {
819  const char *ticket_id = NULL;
820  const char *granted = NULL;
821  const char *last_granted = NULL;
822  const char *standby = NULL;
823  xmlAttrPtr xIter = NULL;
824 
825  ticket_t *ticket = NULL;
826 
827  ticket_id = ID(xml_ticket);
828  if (ticket_id == NULL || strlen(ticket_id) == 0) {
829  return FALSE;
830  }
831 
832  crm_trace("Processing ticket state for %s", ticket_id);
833 
834  ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
835  if (ticket == NULL) {
836  ticket = ticket_new(ticket_id, data_set);
837  if (ticket == NULL) {
838  return FALSE;
839  }
840  }
841 
842  for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
843  const char *prop_name = (const char *)xIter->name;
844  const char *prop_value = crm_element_value(xml_ticket, prop_name);
845 
846  if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
847  continue;
848  }
849  g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
850  }
851 
852  granted = g_hash_table_lookup(ticket->state, "granted");
853  if (granted && crm_is_true(granted)) {
854  ticket->granted = TRUE;
855  crm_info("We have ticket '%s'", ticket->id);
856  } else {
857  ticket->granted = FALSE;
858  crm_info("We do not have ticket '%s'", ticket->id);
859  }
860 
861  last_granted = g_hash_table_lookup(ticket->state, "last-granted");
862  if (last_granted) {
863  ticket->last_granted = crm_parse_int(last_granted, 0);
864  }
865 
866  standby = g_hash_table_lookup(ticket->state, "standby");
867  if (standby && crm_is_true(standby)) {
868  ticket->standby = TRUE;
869  if (ticket->granted) {
870  crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
871  }
872  } else {
873  ticket->standby = FALSE;
874  }
875 
876  crm_trace("Done with ticket state for %s", ticket_id);
877 
878  return TRUE;
879 }
880 
881 static gboolean
882 unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
883 {
884  xmlNode *xml_obj = NULL;
885 
886  for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
887  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
888  continue;
889  }
890  unpack_ticket_state(xml_obj, data_set);
891  }
892 
893  return TRUE;
894 }
895 
896 static void
897 unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set)
898 {
899  const char *resource_discovery_enabled = NULL;
900  xmlNode *attrs = NULL;
901  resource_t *rsc = NULL;
902  const char *shutdown = NULL;
903 
904  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
905  return;
906  }
907 
908  if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
909  return;
910  }
911  crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
912 
913  this_node->details->remote_maintenance =
915 
916  rsc = this_node->details->remote_rsc;
917  if (this_node->details->remote_requires_reset == FALSE) {
918  this_node->details->unclean = FALSE;
919  this_node->details->unseen = FALSE;
920  }
921  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
922  add_node_attrs(attrs, this_node, TRUE, data_set);
923 
924  shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
925  if (shutdown != NULL && safe_str_neq("0", shutdown)) {
926  crm_info("Node %s is shutting down", this_node->details->uname);
927  this_node->details->shutdown = TRUE;
928  if (rsc) {
930  }
931  }
932 
933  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
934  crm_info("Node %s is in standby-mode", this_node->details->uname);
935  this_node->details->standby = TRUE;
936  }
937 
938  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
939  (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
940  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
941  this_node->details->maintenance = TRUE;
942  }
943 
944  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
945  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
946  if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
947  crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.",
949  } else {
950  /* if we're here, this is either a baremetal node and fencing is enabled,
951  * or this is a container node which we don't care if fencing is enabled
952  * or not on. container nodes are 'fenced' by recovering the container resource
953  * regardless of whether fencing is enabled. */
954  crm_info("Node %s has resource discovery disabled", this_node->details->uname);
955  this_node->details->rsc_discovery_enabled = FALSE;
956  }
957  }
958 }
959 
960 static bool
961 unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
962 {
963  bool changed = false;
964  xmlNode *lrm_rsc = NULL;
965 
966  for (xmlNode *state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
967  const char *id = NULL;
968  const char *uname = NULL;
969  node_t *this_node = NULL;
970  bool process = FALSE;
971 
972  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
973  continue;
974  }
975 
976  id = crm_element_value(state, XML_ATTR_ID);
977  uname = crm_element_value(state, XML_ATTR_UNAME);
978  this_node = pe_find_node_any(data_set->nodes, id, uname);
979 
980  if (this_node == NULL) {
981  crm_info("Node %s is unknown", id);
982  continue;
983 
984  } else if (this_node->details->unpacked) {
985  crm_info("Node %s is already processed", id);
986  continue;
987 
988  } else if (is_remote_node(this_node) == FALSE && is_set(data_set->flags, pe_flag_stonith_enabled)) {
989  // A redundant test, but preserves the order for regression tests
990  process = TRUE;
991 
992  } else if (is_remote_node(this_node)) {
993  bool check = FALSE;
994  resource_t *rsc = this_node->details->remote_rsc;
995 
996  if(fence) {
997  check = TRUE;
998 
999  } else if(rsc == NULL) {
1000  /* Not ready yet */
1001 
1002  } else if (is_container_remote_node(this_node)
1003  && rsc->role == RSC_ROLE_STARTED
1004  && rsc->container->role == RSC_ROLE_STARTED) {
1005  /* Both the connection and the underlying container
1006  * need to be known 'up' before we volunterily process
1007  * resources inside it
1008  */
1009  check = TRUE;
1010  crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
1011 
1012  } else if (is_container_remote_node(this_node) == FALSE
1013  && rsc->role == RSC_ROLE_STARTED) {
1014  check = TRUE;
1015  crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
1016  }
1017 
1018  if (check) {
1019  determine_remote_online_status(data_set, this_node);
1020  unpack_handle_remote_attrs(this_node, state, data_set);
1021  process = TRUE;
1022  }
1023 
1024  } else if (this_node->details->online) {
1025  process = TRUE;
1026 
1027  } else if (fence) {
1028  process = TRUE;
1029  }
1030 
1031  if(process) {
1032  crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
1033  fence?"un":"", is_remote_node(this_node)?" remote":"",
1034  this_node->details->uname);
1035  changed = TRUE;
1036  this_node->details->unpacked = TRUE;
1037 
1038  lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
1039  lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
1040  unpack_lrm_resources(this_node, lrm_rsc, data_set);
1041  }
1042  }
1043  return changed;
1044 }
1045 
1046 /* remove nodes that are down, stopping */
1047 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1048 /* anything else? */
1049 gboolean
1050 unpack_status(xmlNode * status, pe_working_set_t * data_set)
1051 {
1052  const char *id = NULL;
1053  const char *uname = NULL;
1054 
1055  xmlNode *state = NULL;
1056  node_t *this_node = NULL;
1057 
1058  crm_trace("Beginning unpack");
1059 
1060  if (data_set->tickets == NULL) {
1061  data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal,
1062  free, destroy_ticket);
1063  }
1064 
1065  for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
1066  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
1067  unpack_tickets_state((xmlNode *) state, data_set);
1068 
1069  } else if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
1070  xmlNode *attrs = NULL;
1071  const char *resource_discovery_enabled = NULL;
1072 
1073  id = crm_element_value(state, XML_ATTR_ID);
1074  uname = crm_element_value(state, XML_ATTR_UNAME);
1075  this_node = pe_find_node_any(data_set->nodes, id, uname);
1076 
1077  if (uname == NULL) {
1078  /* error */
1079  continue;
1080 
1081  } else if (this_node == NULL) {
1082  crm_config_warn("Node %s in status section no longer exists", uname);
1083  continue;
1084 
1085  } else if (is_remote_node(this_node)) {
1086  /* online state for remote nodes is determined by the
1087  * rsc state after all the unpacking is done. we do however
1088  * need to mark whether or not the node has been fenced as this plays
1089  * a role during unpacking cluster node resource state */
1090  this_node->details->remote_was_fenced =
1092  continue;
1093  }
1094 
1095  crm_trace("Processing node id=%s, uname=%s", id, uname);
1096 
1097  /* Mark the node as provisionally clean
1098  * - at least we have seen it in the current cluster's lifetime
1099  */
1100  this_node->details->unclean = FALSE;
1101  this_node->details->unseen = FALSE;
1102  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1103  add_node_attrs(attrs, this_node, TRUE, data_set);
1104 
1105  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1106  crm_info("Node %s is in standby-mode", this_node->details->uname);
1107  this_node->details->standby = TRUE;
1108  }
1109 
1110  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
1111  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
1112  this_node->details->maintenance = TRUE;
1113  }
1114 
1115  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1116  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1117  crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
1119  }
1120 
1121  crm_trace("determining node state");
1122  determine_online_status(state, this_node, data_set);
1123 
1124  if (is_not_set(data_set->flags, pe_flag_have_quorum)
1125  && this_node->details->online
1126  && (data_set->no_quorum_policy == no_quorum_suicide)) {
1127  /* Everything else should flow from this automatically
1128  * At least until the PE becomes able to migrate off healthy resources
1129  */
1130  pe_fence_node(data_set, this_node, "cluster does not have quorum");
1131  }
1132  }
1133  }
1134 
1135 
1136  while(unpack_node_loop(status, FALSE, data_set)) {
1137  crm_trace("Start another loop");
1138  }
1139 
1140  // Now catch any nodes we didn't see
1141  unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
1142 
1143  /* Now that we know where resources are, we can schedule stops of containers
1144  * with failed bundle connections
1145  */
1146  if (data_set->stop_needed != NULL) {
1147  for (GList *item = data_set->stop_needed; item; item = item->next) {
1148  pe_resource_t *container = item->data;
1149  pe_node_t *node = pe__current_node(container);
1150 
1151  if (node) {
1152  stop_action(container, node, FALSE);
1153  }
1154  }
1155  g_list_free(data_set->stop_needed);
1156  data_set->stop_needed = NULL;
1157  }
1158 
1159  for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1160  node_t *this_node = gIter->data;
1161 
1162  if (this_node == NULL) {
1163  continue;
1164  } else if(is_remote_node(this_node) == FALSE) {
1165  continue;
1166  } else if(this_node->details->unpacked) {
1167  continue;
1168  }
1169  determine_remote_online_status(data_set, this_node);
1170  }
1171 
1172  return TRUE;
1173 }
1174 
1175 static gboolean
1176 determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1177  node_t * this_node)
1178 {
1179  gboolean online = FALSE;
1180  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1181  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1182  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1183  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1184 
1185  if (!crm_is_true(in_cluster)) {
1186  crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
1187 
1188  } else if (safe_str_eq(is_peer, ONLINESTATUS)) {
1189  if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1190  online = TRUE;
1191  } else {
1192  crm_debug("Node is not ready to run resources: %s", join);
1193  }
1194 
1195  } else if (this_node->details->expected_up == FALSE) {
1196  crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster));
1197  crm_trace("\tis_peer=%s, join=%s, expected=%s",
1198  crm_str(is_peer), crm_str(join), crm_str(exp_state));
1199 
1200  } else {
1201  /* mark it unclean */
1202  pe_fence_node(data_set, this_node, "peer is unexpectedly down");
1203  crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
1204  crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
1205  }
1206  return online;
1207 }
1208 
1209 static gboolean
1210 determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1211  node_t * this_node)
1212 {
1213  gboolean online = FALSE;
1214  gboolean do_terminate = FALSE;
1215  bool crmd_online = FALSE;
1216  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1217  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1218  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1219  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1220  const char *terminate = pe_node_attribute_raw(this_node, "terminate");
1221 
1222 /*
1223  - XML_NODE_IN_CLUSTER ::= true|false
1224  - XML_NODE_IS_PEER ::= online|offline
1225  - XML_NODE_JOIN_STATE ::= member|down|pending|banned
1226  - XML_NODE_EXPECTED ::= member|down
1227 */
1228 
1229  if (crm_is_true(terminate)) {
1230  do_terminate = TRUE;
1231 
1232  } else if (terminate != NULL && strlen(terminate) > 0) {
1233  /* could be a time() value */
1234  char t = terminate[0];
1235 
1236  if (t != '0' && isdigit(t)) {
1237  do_terminate = TRUE;
1238  }
1239  }
1240 
1241  crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
1242  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1243  crm_str(join), crm_str(exp_state), do_terminate);
1244 
1245  online = crm_is_true(in_cluster);
1246  crmd_online = safe_str_eq(is_peer, ONLINESTATUS);
1247  if (exp_state == NULL) {
1248  exp_state = CRMD_JOINSTATE_DOWN;
1249  }
1250 
1251  if (this_node->details->shutdown) {
1252  crm_debug("%s is shutting down", this_node->details->uname);
1253 
1254  /* Slightly different criteria since we can't shut down a dead peer */
1255  online = crmd_online;
1256 
1257  } else if (in_cluster == NULL) {
1258  pe_fence_node(data_set, this_node, "peer has not been seen by the cluster");
1259 
1260  } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
1261  pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria");
1262 
1263  } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
1264 
1265  if (crm_is_true(in_cluster) || crmd_online) {
1266  crm_info("- Node %s is not ready to run resources", this_node->details->uname);
1267  this_node->details->standby = TRUE;
1268  this_node->details->pending = TRUE;
1269 
1270  } else {
1271  crm_trace("%s is down or still coming up", this_node->details->uname);
1272  }
1273 
1274  } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
1275  && crm_is_true(in_cluster) == FALSE && !crmd_online) {
1276  crm_info("Node %s was just shot", this_node->details->uname);
1277  online = FALSE;
1278 
1279  } else if (crm_is_true(in_cluster) == FALSE) {
1280  pe_fence_node(data_set, this_node, "peer is no longer part of the cluster");
1281 
1282  } else if (!crmd_online) {
1283  pe_fence_node(data_set, this_node, "peer process is no longer available");
1284 
1285  /* Everything is running at this point, now check join state */
1286  } else if (do_terminate) {
1287  pe_fence_node(data_set, this_node, "termination was requested");
1288 
1289  } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1290  crm_info("Node %s is active", this_node->details->uname);
1291 
1292  } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
1293  || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
1294  crm_info("Node %s is not ready to run resources", this_node->details->uname);
1295  this_node->details->standby = TRUE;
1296  this_node->details->pending = TRUE;
1297 
1298  } else {
1299  pe_fence_node(data_set, this_node, "peer was in an unknown state");
1300  crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
1301  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1302  crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
1303  }
1304 
1305  return online;
1306 }
1307 
1308 static gboolean
1309 determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node)
1310 {
1311  resource_t *rsc = this_node->details->remote_rsc;
1312  resource_t *container = NULL;
1313  pe_node_t *host = NULL;
1314 
1315  /* If there is a node state entry for a (former) Pacemaker Remote node
1316  * but no resource creating that node, the node's connection resource will
1317  * be NULL. Consider it an offline remote node in that case.
1318  */
1319  if (rsc == NULL) {
1320  this_node->details->online = FALSE;
1321  goto remote_online_done;
1322  }
1323 
1324  container = rsc->container;
1325 
1326  if (container && (g_list_length(rsc->running_on) == 1)) {
1327  host = rsc->running_on->data;
1328  }
1329 
1330  /* If the resource is currently started, mark it online. */
1331  if (rsc->role == RSC_ROLE_STARTED) {
1332  crm_trace("%s node %s presumed ONLINE because connection resource is started",
1333  (container? "Guest" : "Remote"), this_node->details->id);
1334  this_node->details->online = TRUE;
1335  }
1336 
1337  /* consider this node shutting down if transitioning start->stop */
1338  if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
1339  crm_trace("%s node %s shutting down because connection resource is stopping",
1340  (container? "Guest" : "Remote"), this_node->details->id);
1341  this_node->details->shutdown = TRUE;
1342  }
1343 
1344  /* Now check all the failure conditions. */
1345  if(container && is_set(container->flags, pe_rsc_failed)) {
1346  crm_trace("Guest node %s UNCLEAN because guest resource failed",
1347  this_node->details->id);
1348  this_node->details->online = FALSE;
1349  this_node->details->remote_requires_reset = TRUE;
1350 
1351  } else if(is_set(rsc->flags, pe_rsc_failed)) {
1352  crm_trace("%s node %s OFFLINE because connection resource failed",
1353  (container? "Guest" : "Remote"), this_node->details->id);
1354  this_node->details->online = FALSE;
1355 
1356  } else if (rsc->role == RSC_ROLE_STOPPED
1357  || (container && container->role == RSC_ROLE_STOPPED)) {
1358 
1359  crm_trace("%s node %s OFFLINE because its resource is stopped",
1360  (container? "Guest" : "Remote"), this_node->details->id);
1361  this_node->details->online = FALSE;
1362  this_node->details->remote_requires_reset = FALSE;
1363 
1364  } else if (host && (host->details->online == FALSE)
1365  && host->details->unclean) {
1366  crm_trace("Guest node %s UNCLEAN because host is unclean",
1367  this_node->details->id);
1368  this_node->details->online = FALSE;
1369  this_node->details->remote_requires_reset = TRUE;
1370  }
1371 
1372 remote_online_done:
1373  crm_trace("Remote node %s online=%s",
1374  this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1375  return this_node->details->online;
1376 }
1377 
1378 gboolean
1379 determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
1380 {
1381  gboolean online = FALSE;
1382  const char *shutdown = NULL;
1383  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1384 
1385  if (this_node == NULL) {
1386  crm_config_err("No node to check");
1387  return online;
1388  }
1389 
1390  this_node->details->shutdown = FALSE;
1391  this_node->details->expected_up = FALSE;
1392  shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
1393 
1394  if (shutdown != NULL && safe_str_neq("0", shutdown)) {
1395  this_node->details->shutdown = TRUE;
1396 
1397  } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
1398  this_node->details->expected_up = TRUE;
1399  }
1400 
1401  if (this_node->details->type == node_ping) {
1402  this_node->details->unclean = FALSE;
1403  online = FALSE; /* As far as resource management is concerned,
1404  * the node is safely offline.
1405  * Anyone caught abusing this logic will be shot
1406  */
1407 
1408  } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
1409  online = determine_online_status_no_fencing(data_set, node_state, this_node);
1410 
1411  } else {
1412  online = determine_online_status_fencing(data_set, node_state, this_node);
1413  }
1414 
1415  if (online) {
1416  this_node->details->online = TRUE;
1417 
1418  } else {
1419  /* remove node from contention */
1420  this_node->fixed = TRUE;
1421  this_node->weight = -INFINITY;
1422  }
1423 
1424  if (online && this_node->details->shutdown) {
1425  /* don't run resources here */
1426  this_node->fixed = TRUE;
1427  this_node->weight = -INFINITY;
1428  }
1429 
1430  if (this_node->details->type == node_ping) {
1431  crm_info("Node %s is not a pacemaker node", this_node->details->uname);
1432 
1433  } else if (this_node->details->unclean) {
1434  pe_proc_warn("Node %s is unclean", this_node->details->uname);
1435 
1436  } else if (this_node->details->online) {
1437  crm_info("Node %s is %s", this_node->details->uname,
1438  this_node->details->shutdown ? "shutting down" :
1439  this_node->details->pending ? "pending" :
1440  this_node->details->standby ? "standby" :
1441  this_node->details->maintenance ? "maintenance" : "online");
1442 
1443  } else {
1444  crm_trace("Node %s is offline", this_node->details->uname);
1445  }
1446 
1447  return online;
1448 }
1449 
1458 const char *
1459 pe_base_name_end(const char *id)
1460 {
1461  if (!crm_strlen_zero(id)) {
1462  const char *end = id + strlen(id) - 1;
1463 
1464  for (const char *s = end; s > id; --s) {
1465  switch (*s) {
1466  case '0':
1467  case '1':
1468  case '2':
1469  case '3':
1470  case '4':
1471  case '5':
1472  case '6':
1473  case '7':
1474  case '8':
1475  case '9':
1476  break;
1477  case ':':
1478  return (s == end)? s : (s - 1);
1479  default:
1480  return end;
1481  }
1482  }
1483  return end;
1484  }
1485  return NULL;
1486 }
1487 
1498 char *
1499 clone_strip(const char *last_rsc_id)
1500 {
1501  const char *end = pe_base_name_end(last_rsc_id);
1502  char *basename = NULL;
1503 
1504  CRM_ASSERT(end);
1505  basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1506  CRM_ASSERT(basename);
1507  return basename;
1508 }
1509 
1520 char *
1521 clone_zero(const char *last_rsc_id)
1522 {
1523  const char *end = pe_base_name_end(last_rsc_id);
1524  size_t base_name_len = end - last_rsc_id + 1;
1525  char *zero = NULL;
1526 
1527  CRM_ASSERT(end);
1528  zero = calloc(base_name_len + 3, sizeof(char));
1529  CRM_ASSERT(zero);
1530  memcpy(zero, last_rsc_id, base_name_len);
1531  zero[base_name_len] = ':';
1532  zero[base_name_len + 1] = '0';
1533  return zero;
1534 }
1535 
1536 static resource_t *
1537 create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
1538 {
1539  resource_t *rsc = NULL;
1540  xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
1541 
1542  copy_in_properties(xml_rsc, rsc_entry);
1543  crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
1544  crm_log_xml_debug(xml_rsc, "Orphan resource");
1545 
1546  if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
1547  return NULL;
1548  }
1549 
1550  if (xml_contains_remote_node(xml_rsc)) {
1551  node_t *node;
1552 
1553  crm_debug("Detected orphaned remote node %s", rsc_id);
1554  node = pe_find_node(data_set->nodes, rsc_id);
1555  if (node == NULL) {
1556  node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
1557  }
1558  link_rsc2remotenode(data_set, rsc);
1559 
1560  if (node) {
1561  crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1562  node->details->shutdown = TRUE;
1563  }
1564  }
1565 
1566  if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
1567  /* This orphaned rsc needs to be mapped to a container. */
1568  crm_trace("Detected orphaned container filler %s", rsc_id);
1570  }
1571  set_bit(rsc->flags, pe_rsc_orphan);
1572  data_set->resources = g_list_append(data_set->resources, rsc);
1573  return rsc;
1574 }
1575 
1580 static pe_resource_t *
1581 create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
1582  pe_node_t *node, pe_working_set_t *data_set)
1583 {
1584  pe_resource_t *top = pe__create_clone_child(parent, data_set);
1585 
1586  // find_rsc() because we might be a cloned group
1587  pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
1588 
1589  pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
1590  top->id, parent->id, rsc_id, node->details->uname);
1591  return orphan;
1592 }
1593 
1608 static resource_t *
1609 find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
1610  const char *rsc_id)
1611 {
1612  GListPtr rIter = NULL;
1613  pe_resource_t *rsc = NULL;
1614  pe_resource_t *inactive_instance = NULL;
1615  gboolean skip_inactive = FALSE;
1616 
1617  CRM_ASSERT(parent != NULL);
1618  CRM_ASSERT(pe_rsc_is_clone(parent));
1619  CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
1620 
1621  // Check for active (or partially active, for cloned groups) instance
1622  pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
1623  for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
1624  GListPtr locations = NULL;
1625  resource_t *child = rIter->data;
1626 
1627  /* Check whether this instance is already known to be active or pending
1628  * anywhere, at this stage of unpacking. Because this function is called
1629  * for a resource before the resource's individual operation history
1630  * entries are unpacked, locations will generally not contain the
1631  * desired node.
1632  *
1633  * However, there are three exceptions:
1634  * (1) when child is a cloned group and we have already unpacked the
1635  * history of another member of the group on the same node;
1636  * (2) when we've already unpacked the history of another numbered
1637  * instance on the same node (which can happen if globally-unique
1638  * was flipped from true to false); and
1639  * (3) when we re-run calculations on the same data set as part of a
1640  * simulation.
1641  */
1642  child->fns->location(child, &locations, 2);
1643  if (locations) {
1644  /* We should never associate the same numbered anonymous clone
1645  * instance with multiple nodes, and clone instances can't migrate,
1646  * so there must be only one location, regardless of history.
1647  */
1648  CRM_LOG_ASSERT(locations->next == NULL);
1649 
1650  if (((pe_node_t *)locations->data)->details == node->details) {
1651  /* This child instance is active on the requested node, so check
1652  * for a corresponding configured resource. We use find_rsc()
1653  * instead of child because child may be a cloned group, and we
1654  * need the particular member corresponding to rsc_id.
1655  *
1656  * If the history entry is orphaned, rsc will be NULL.
1657  */
1658  rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
1659  if (rsc) {
1660  /* If there are multiple instance history entries for an
1661  * anonymous clone in a single node's history (which can
1662  * happen if globally-unique is switched from true to
1663  * false), we want to consider the instances beyond the
1664  * first as orphans, even if there are inactive instance
1665  * numbers available.
1666  */
1667  if (rsc->running_on) {
1668  crm_notice("Active (now-)anonymous clone %s has "
1669  "multiple (orphan) instance histories on %s",
1670  parent->id, node->details->uname);
1671  skip_inactive = TRUE;
1672  rsc = NULL;
1673  } else {
1674  pe_rsc_trace(parent, "Resource %s, active", rsc->id);
1675  }
1676  }
1677  }
1678  g_list_free(locations);
1679 
1680  } else {
1681  pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
1682  if (!skip_inactive && !inactive_instance
1683  && is_not_set(child->flags, pe_rsc_block)) {
1684  // Remember one inactive instance in case we don't find active
1685  inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
1686  pe_find_clone);
1687 
1688  /* ... but don't use it if it was already associated with a
1689  * pending action on another node
1690  */
1691  if (inactive_instance && inactive_instance->pending_node
1692  && (inactive_instance->pending_node->details != node->details)) {
1693  inactive_instance = NULL;
1694  }
1695  }
1696  }
1697  }
1698 
1699  if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
1700  pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
1701  rsc = inactive_instance;
1702  }
1703 
1704  /* If the resource has "requires" set to "quorum" or "nothing", and we don't
1705  * have a clone instance for every node, we don't want to consume a valid
1706  * instance number for unclean nodes. Such instances may appear to be active
1707  * according to the history, but should be considered inactive, so we can
1708  * start an instance elsewhere. Treat such instances as orphans.
1709  *
1710  * An exception is instances running on guest nodes -- since guest node
1711  * "fencing" is actually just a resource stop, requires shouldn't apply.
1712  *
1713  * @TODO Ideally, we'd use an inactive instance number if it is not needed
1714  * for any clean instances. However, we don't know that at this point.
1715  */
1716  if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing)
1717  && (!node->details->online || node->details->unclean)
1718  && !is_container_remote_node(node)
1719  && !pe__is_universal_clone(parent, data_set)) {
1720 
1721  rsc = NULL;
1722  }
1723 
1724  if (rsc == NULL) {
1725  rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
1726  pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
1727  }
1728  return rsc;
1729 }
1730 
1731 static resource_t *
1732 unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
1733  xmlNode * rsc_entry)
1734 {
1735  resource_t *rsc = NULL;
1736  resource_t *parent = NULL;
1737 
1738  crm_trace("looking for %s", rsc_id);
1739  rsc = pe_find_resource(data_set->resources, rsc_id);
1740 
1741  if (rsc == NULL) {
1742  /* If we didn't find the resource by its name in the operation history,
1743  * check it again as a clone instance. Even when clone-max=0, we create
1744  * a single :0 orphan to match against here.
1745  */
1746  char *clone0_id = clone_zero(rsc_id);
1747  resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
1748 
1749  if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
1750  rsc = clone0;
1751  parent = uber_parent(clone0);
1752  crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
1753  } else {
1754  crm_trace("%s is not known as %s either (orphan)",
1755  rsc_id, clone0_id);
1756  }
1757  free(clone0_id);
1758 
1759  } else if (rsc->variant > pe_native) {
1760  crm_trace("Resource history for %s is orphaned because it is no longer primitive",
1761  rsc_id);
1762  return NULL;
1763 
1764  } else {
1765  parent = uber_parent(rsc);
1766  }
1767 
1768  if (pe_rsc_is_anon_clone(parent)) {
1769 
1770  if (pe_rsc_is_bundled(parent)) {
1771  rsc = find_container_child(parent->parent, node);
1772  } else {
1773  char *base = clone_strip(rsc_id);
1774 
1775  rsc = find_anonymous_clone(data_set, node, parent, base);
1776  free(base);
1777  CRM_ASSERT(rsc != NULL);
1778  }
1779  }
1780 
1781  if (rsc && safe_str_neq(rsc_id, rsc->id)
1782  && safe_str_neq(rsc_id, rsc->clone_name)) {
1783 
1784  free(rsc->clone_name);
1785  rsc->clone_name = strdup(rsc_id);
1786  pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
1787  rsc_id, node->details->uname, rsc->id,
1788  (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
1789  }
1790  return rsc;
1791 }
1792 
1793 static resource_t *
1794 process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
1795 {
1796  resource_t *rsc = NULL;
1797  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
1798 
1799  crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
1800  rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
1801 
1802  if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
1804 
1805  } else {
1806  print_resource(LOG_TRACE, "Added orphan", rsc, FALSE);
1807 
1808  CRM_CHECK(rsc != NULL, return NULL);
1809  resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
1810  }
1811  return rsc;
1812 }
1813 
1814 static void
1815 process_rsc_state(resource_t * rsc, node_t * node,
1816  enum action_fail_response on_fail,
1817  xmlNode * migrate_op, pe_working_set_t * data_set)
1818 {
1819  node_t *tmpnode = NULL;
1820  char *reason = NULL;
1821 
1822  CRM_ASSERT(rsc);
1823  pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
1824  rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
1825 
1826  /* process current state */
1827  if (rsc->role != RSC_ROLE_UNKNOWN) {
1828  resource_t *iter = rsc;
1829 
1830  while (iter) {
1831  if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
1832  node_t *n = node_copy(node);
1833 
1834  pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
1835  n->details->uname);
1836  g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
1837  }
1838  if (is_set(iter->flags, pe_rsc_unique)) {
1839  break;
1840  }
1841  iter = iter->parent;
1842  }
1843  }
1844 
1845  /* If a managed resource is believed to be running, but node is down ... */
1846  if (rsc->role > RSC_ROLE_STOPPED
1847  && node->details->online == FALSE
1848  && node->details->maintenance == FALSE
1849  && is_set(rsc->flags, pe_rsc_managed)) {
1850 
1851  gboolean should_fence = FALSE;
1852 
1853  /* If this is a guest node, fence it (regardless of whether fencing is
1854  * enabled, because guest node fencing is done by recovery of the
1855  * container resource rather than by the fencer). Mark the resource
1856  * we're processing as failed. When the guest comes back up, its
1857  * operation history in the CIB will be cleared, freeing the affected
1858  * resource to run again once we are sure we know its state.
1859  */
1860  if (is_container_remote_node(node)) {
1861  set_bit(rsc->flags, pe_rsc_failed);
1862  should_fence = TRUE;
1863 
1864  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
1865  if (is_baremetal_remote_node(node) && node->details->remote_rsc
1866  && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
1867 
1868  /* setting unseen = true means that fencing of the remote node will
1869  * only occur if the connection resource is not going to start somewhere.
1870  * This allows connection resources on a failed cluster-node to move to
1871  * another node without requiring the baremetal remote nodes to be fenced
1872  * as well. */
1873  node->details->unseen = TRUE;
1874  reason = crm_strdup_printf("%s is active there (fencing will be"
1875  " revoked if remote connection can "
1876  "be re-established elsewhere)",
1877  rsc->id);
1878  }
1879  should_fence = TRUE;
1880  }
1881 
1882  if (should_fence) {
1883  if (reason == NULL) {
1884  reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
1885  }
1886  pe_fence_node(data_set, node, reason);
1887  }
1888  free(reason);
1889  }
1890 
1891  if (node->details->unclean) {
1892  /* No extra processing needed
1893  * Also allows resources to be started again after a node is shot
1894  */
1895  on_fail = action_fail_ignore;
1896  }
1897 
1898  switch (on_fail) {
1899  case action_fail_ignore:
1900  /* nothing to do */
1901  break;
1902 
1903  case action_fail_fence:
1904  /* treat it as if it is still running
1905  * but also mark the node as unclean
1906  */
1907  reason = crm_strdup_printf("%s failed there", rsc->id);
1908  pe_fence_node(data_set, node, reason);
1909  free(reason);
1910  break;
1911 
1912  case action_fail_standby:
1913  node->details->standby = TRUE;
1914  node->details->standby_onfail = TRUE;
1915  break;
1916 
1917  case action_fail_block:
1918  /* is_managed == FALSE will prevent any
1919  * actions being sent for the resource
1920  */
1922  set_bit(rsc->flags, pe_rsc_block);
1923  break;
1924 
1925  case action_fail_migrate:
1926  /* make sure it comes up somewhere else
1927  * or not at all
1928  */
1929  resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
1930  break;
1931 
1932  case action_fail_stop:
1933  rsc->next_role = RSC_ROLE_STOPPED;
1934  break;
1935 
1936  case action_fail_recover:
1937  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
1938  set_bit(rsc->flags, pe_rsc_failed);
1939  stop_action(rsc, node, FALSE);
1940  }
1941  break;
1942 
1944  set_bit(rsc->flags, pe_rsc_failed);
1945 
1946  if (rsc->container && pe_rsc_is_bundled(rsc)) {
1947  /* A bundle's remote connection can run on a different node than
1948  * the bundle's container. We don't necessarily know where the
1949  * container is running yet, so remember it and add a stop
1950  * action for it later.
1951  */
1952  data_set->stop_needed = g_list_prepend(data_set->stop_needed,
1953  rsc->container);
1954  } else if (rsc->container) {
1955  stop_action(rsc->container, node, FALSE);
1956  } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
1957  stop_action(rsc, node, FALSE);
1958  }
1959  break;
1960 
1962  set_bit(rsc->flags, pe_rsc_failed);
1963  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
1964  tmpnode = NULL;
1965  if (rsc->is_remote_node) {
1966  tmpnode = pe_find_node(data_set->nodes, rsc->id);
1967  }
1968  if (tmpnode &&
1969  is_baremetal_remote_node(tmpnode) &&
1970  tmpnode->details->remote_was_fenced == 0) {
1971 
1972  /* connection resource to baremetal resource failed in a way that
1973  * should result in fencing the remote-node. */
1974  pe_fence_node(data_set, tmpnode,
1975  "remote connection is unrecoverable");
1976  }
1977  }
1978 
1979  /* require the stop action regardless if fencing is occurring or not. */
1980  if (rsc->role > RSC_ROLE_STOPPED) {
1981  stop_action(rsc, node, FALSE);
1982  }
1983 
1984  /* if reconnect delay is in use, prevent the connection from exiting the
1985  * "STOPPED" role until the failure is cleared by the delay timeout. */
1986  if (rsc->remote_reconnect_ms) {
1987  rsc->next_role = RSC_ROLE_STOPPED;
1988  }
1989  break;
1990  }
1991 
1992  /* ensure a remote-node connection failure forces an unclean remote-node
1993  * to be fenced. By setting unseen = FALSE, the remote-node failure will
1994  * result in a fencing operation regardless if we're going to attempt to
1995  * reconnect to the remote-node in this transition or not. */
1996  if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
1997  tmpnode = pe_find_node(data_set->nodes, rsc->id);
1998  if (tmpnode && tmpnode->details->unclean) {
1999  tmpnode->details->unseen = FALSE;
2000  }
2001  }
2002 
2003  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2004  if (is_set(rsc->flags, pe_rsc_orphan)) {
2005  if (is_set(rsc->flags, pe_rsc_managed)) {
2006  crm_config_warn("Detected active orphan %s running on %s",
2007  rsc->id, node->details->uname);
2008  } else {
2009  crm_config_warn("Cluster configured not to stop active orphans."
2010  " %s must be stopped manually on %s",
2011  rsc->id, node->details->uname);
2012  }
2013  }
2014 
2015  native_add_running(rsc, node, data_set);
2016  if (on_fail != action_fail_ignore) {
2017  set_bit(rsc->flags, pe_rsc_failed);
2018  }
2019 
2020  } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2021  /* Only do this for older status sections that included instance numbers
2022  * Otherwise stopped instances will appear as orphans
2023  */
2024  pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
2025  free(rsc->clone_name);
2026  rsc->clone_name = NULL;
2027 
2028  } else {
2029  char *key = stop_key(rsc);
2030  GListPtr possible_matches = find_actions(rsc->actions, key, node);
2031  GListPtr gIter = possible_matches;
2032 
2033  for (; gIter != NULL; gIter = gIter->next) {
2034  action_t *stop = (action_t *) gIter->data;
2035 
2036  stop->flags |= pe_action_optional;
2037  }
2038 
2039  g_list_free(possible_matches);
2040  free(key);
2041  }
2042 }
2043 
2044 /* create active recurring operations as optional */
2045 static void
2046 process_recurring(node_t * node, resource_t * rsc,
2047  int start_index, int stop_index,
2048  GListPtr sorted_op_list, pe_working_set_t * data_set)
2049 {
2050  int counter = -1;
2051  const char *task = NULL;
2052  const char *status = NULL;
2053  GListPtr gIter = sorted_op_list;
2054 
2055  CRM_ASSERT(rsc);
2056  pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
2057 
2058  for (; gIter != NULL; gIter = gIter->next) {
2059  xmlNode *rsc_op = (xmlNode *) gIter->data;
2060 
2061  guint interval_ms = 0;
2062  char *key = NULL;
2063  const char *id = ID(rsc_op);
2064  const char *interval_ms_s = NULL;
2065 
2066  counter++;
2067 
2068  if (node->details->online == FALSE) {
2069  pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
2070  break;
2071 
2072  /* Need to check if there's a monitor for role="Stopped" */
2073  } else if (start_index < stop_index && counter <= stop_index) {
2074  pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
2075  continue;
2076 
2077  } else if (counter < start_index) {
2078  pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
2079  continue;
2080  }
2081 
2082  interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS);
2083  interval_ms = crm_parse_ms(interval_ms_s);
2084  if (interval_ms == 0) {
2085  pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
2086  continue;
2087  }
2088 
2089  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2090  if (safe_str_eq(status, "-1")) {
2091  pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
2092  continue;
2093  }
2094  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2095  /* create the action */
2096  key = generate_op_key(rsc->id, task, interval_ms);
2097  pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
2098  custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
2099  }
2100 }
2101 
2102 void
2103 calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
2104 {
2105  int counter = -1;
2106  int implied_monitor_start = -1;
2107  int implied_clone_start = -1;
2108  const char *task = NULL;
2109  const char *status = NULL;
2110  GListPtr gIter = sorted_op_list;
2111 
2112  *stop_index = -1;
2113  *start_index = -1;
2114 
2115  for (; gIter != NULL; gIter = gIter->next) {
2116  xmlNode *rsc_op = (xmlNode *) gIter->data;
2117 
2118  counter++;
2119 
2120  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2121  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2122 
2123  if (safe_str_eq(task, CRMD_ACTION_STOP)
2124  && safe_str_eq(status, "0")) {
2125  *stop_index = counter;
2126 
2127  } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2128  *start_index = counter;
2129 
2130  } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2131  const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
2132 
2133  if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
2134  implied_monitor_start = counter;
2135  }
2136  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
2137  implied_clone_start = counter;
2138  }
2139  }
2140 
2141  if (*start_index == -1) {
2142  if (implied_clone_start != -1) {
2143  *start_index = implied_clone_start;
2144  } else if (implied_monitor_start != -1) {
2145  *start_index = implied_monitor_start;
2146  }
2147  }
2148 }
2149 
2150 static resource_t *
2151 unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
2152 {
2153  GListPtr gIter = NULL;
2154  int stop_index = -1;
2155  int start_index = -1;
2156  enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
2157 
2158  const char *task = NULL;
2159  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2160 
2161  resource_t *rsc = NULL;
2162  GListPtr op_list = NULL;
2163  GListPtr sorted_op_list = NULL;
2164 
2165  xmlNode *migrate_op = NULL;
2166  xmlNode *rsc_op = NULL;
2167  xmlNode *last_failure = NULL;
2168 
2169  enum action_fail_response on_fail = FALSE;
2170  enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
2171 
2172  crm_trace("[%s] Processing %s on %s",
2173  crm_element_name(rsc_entry), rsc_id, node->details->uname);
2174 
2175  /* extract operations */
2176  op_list = NULL;
2177  sorted_op_list = NULL;
2178 
2179  for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
2180  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
2181  op_list = g_list_prepend(op_list, rsc_op);
2182  }
2183  }
2184 
2185  if (op_list == NULL) {
2186  /* if there are no operations, there is nothing to do */
2187  return NULL;
2188  }
2189 
2190  /* find the resource */
2191  rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
2192  if (rsc == NULL) {
2193  rsc = process_orphan_resource(rsc_entry, node, data_set);
2194  }
2195  CRM_ASSERT(rsc != NULL);
2196 
2197  /* process operations */
2198  saved_role = rsc->role;
2199  on_fail = action_fail_ignore;
2200  rsc->role = RSC_ROLE_UNKNOWN;
2201  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2202 
2203  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2204  xmlNode *rsc_op = (xmlNode *) gIter->data;
2205 
2206  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2207  if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2208  migrate_op = rsc_op;
2209  }
2210 
2211  unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
2212  }
2213 
2214  /* create active recurring operations as optional */
2215  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2216  process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
2217 
2218  /* no need to free the contents */
2219  g_list_free(sorted_op_list);
2220 
2221  process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
2222 
2223  if (get_target_role(rsc, &req_role)) {
2224  if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
2225  pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
2226  " with requested next role %s",
2227  rsc->id, role2text(rsc->next_role), role2text(req_role));
2228  rsc->next_role = req_role;
2229 
2230  } else if (req_role > rsc->next_role) {
2231  pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
2232  " with requested next role %s",
2233  rsc->id, role2text(rsc->next_role), role2text(req_role));
2234  }
2235  }
2236 
2237  if (saved_role > rsc->role) {
2238  rsc->role = saved_role;
2239  }
2240 
2241  return rsc;
2242 }
2243 
2244 static void
2245 handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2246 {
2247  xmlNode *rsc_entry = NULL;
2248  for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
2249  rsc_entry = __xml_next_element(rsc_entry)) {
2250 
2251  resource_t *rsc;
2252  resource_t *container;
2253  const char *rsc_id;
2254  const char *container_id;
2255 
2256  if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
2257  continue;
2258  }
2259 
2260  container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
2261  rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2262  if (container_id == NULL || rsc_id == NULL) {
2263  continue;
2264  }
2265 
2266  container = pe_find_resource(data_set->resources, container_id);
2267  if (container == NULL) {
2268  continue;
2269  }
2270 
2271  rsc = pe_find_resource(data_set->resources, rsc_id);
2272  if (rsc == NULL ||
2273  is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
2274  rsc->container != NULL) {
2275  continue;
2276  }
2277 
2278  pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2279  rsc->id, container_id);
2280  rsc->container = container;
2281  container->fillers = g_list_append(container->fillers, rsc);
2282  }
2283 }
2284 
2285 gboolean
2286 unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2287 {
2288  xmlNode *rsc_entry = NULL;
2289  gboolean found_orphaned_container_filler = FALSE;
2290 
2291  CRM_CHECK(node != NULL, return FALSE);
2292 
2293  crm_trace("Unpacking resources on %s", node->details->uname);
2294 
2295  for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
2296  rsc_entry = __xml_next_element(rsc_entry)) {
2297 
2298  if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
2299  resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
2300  if (!rsc) {
2301  continue;
2302  }
2303  if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
2304  found_orphaned_container_filler = TRUE;
2305  }
2306  }
2307  }
2308 
2309  /* now that all the resource state has been unpacked for this node
2310  * we have to go back and map any orphaned container fillers to their
2311  * container resource */
2312  if (found_orphaned_container_filler) {
2313  handle_orphaned_container_fillers(lrm_rsc_list, data_set);
2314  }
2315  return TRUE;
2316 }
2317 
2318 static void
2319 set_active(resource_t * rsc)
2320 {
2321  resource_t *top = uber_parent(rsc);
2322 
2323  if (top && is_set(top->flags, pe_rsc_promotable)) {
2324  rsc->role = RSC_ROLE_SLAVE;
2325  } else {
2326  rsc->role = RSC_ROLE_STARTED;
2327  }
2328 }
2329 
2330 static void
2331 set_node_score(gpointer key, gpointer value, gpointer user_data)
2332 {
2333  node_t *node = value;
2334  int *score = user_data;
2335 
2336  node->weight = *score;
2337 }
2338 
2339 #define STATUS_PATH_MAX 1024
2340 static xmlNode *
2341 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2342  pe_working_set_t * data_set)
2343 {
2344  int offset = 0;
2345  char xpath[STATUS_PATH_MAX];
2346 
2347  offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
2348  offset +=
2349  snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
2350  resource);
2351 
2352  /* Need to check against transition_magic too? */
2353  if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
2354  offset +=
2355  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2356  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
2357  source);
2358  } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
2359  offset +=
2360  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2361  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
2362  source);
2363  } else {
2364  offset +=
2365  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2366  "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
2367  }
2368 
2369  CRM_LOG_ASSERT(offset > 0);
2370  return get_xpath_object(xpath, data_set->input, LOG_DEBUG);
2371 }
2372 
2373 static bool
2374 stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2375  pe_working_set_t *data_set)
2376 {
2377  xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id,
2378  NULL, data_set);
2379 
2380  if (stop_op) {
2381  int stop_id = 0;
2382  int task_id = 0;
2383 
2384  crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
2385  crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
2386  if (stop_id > task_id) {
2387  return TRUE;
2388  }
2389  }
2390  return FALSE;
2391 }
2392 
2393 static void
2394 unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
2395 {
2396  /* A successful migration sequence is:
2397  * migrate_to on source node
2398  * migrate_from on target node
2399  * stop on source node
2400  *
2401  * If a migrate_to is followed by a stop, the entire migration (successful
2402  * or failed) is complete, and we don't care what happened on the target.
2403  *
2404  * If no migrate_from has happened, the migration is considered to be
2405  * "partial". If the migrate_from failed, make sure the resource gets
2406  * stopped on both source and target (if up).
2407  *
2408  * If the migrate_to and migrate_from both succeeded (which also implies the
2409  * resource is no longer running on the source), but there is no stop, the
2410  * migration is considered to be "dangling".
2411  */
2412  int from_rc = 0;
2413  int from_status = 0;
2414  const char *migrate_source = NULL;
2415  const char *migrate_target = NULL;
2416  pe_node_t *target = NULL;
2417  pe_node_t *source = NULL;
2418  xmlNode *migrate_from = NULL;
2419 
2420  if (stop_happened_after(rsc, node, xml_op, data_set)) {
2421  return;
2422  }
2423 
2424  // Clones are not allowed to migrate, so role can't be master
2425  rsc->role = RSC_ROLE_STARTED;
2426 
2427  migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2428  migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2429 
2430  target = pe_find_node(data_set->nodes, migrate_target);
2431  source = pe_find_node(data_set->nodes, migrate_source);
2432 
2433  // Check whether there was a migrate_from action
2434  migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target,
2435  migrate_source, data_set);
2436  if (migrate_from) {
2437  crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
2438  crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
2439  pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
2440  ID(migrate_from), migrate_target, from_status, from_rc);
2441  }
2442 
2443  if (migrate_from && from_rc == PCMK_OCF_OK
2444  && from_status == PCMK_LRM_OP_DONE) {
2445  /* The migrate_to and migrate_from both succeeded, so mark the migration
2446  * as "dangling". This will be used to schedule a stop action on the
2447  * source without affecting the target.
2448  */
2449  pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
2450  migrate_source);
2451  rsc->role = RSC_ROLE_STOPPED;
2452  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2453 
2454  } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
2455  if (target && target->details->online) {
2456  pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
2457  target->details->online);
2458  native_add_running(rsc, target, data_set);
2459  }
2460 
2461  } else { // Pending, or complete but erased
2462  if (target && target->details->online) {
2463  pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
2464  target->details->online);
2465 
2466  native_add_running(rsc, target, data_set);
2467  if (source && source->details->online) {
2468  /* This is a partial migration: the migrate_to completed
2469  * successfully on the source, but the migrate_from has not
2470  * completed. Remember the source and target; if the newly
2471  * chosen target remains the same when we schedule actions
2472  * later, we may continue with the migration.
2473  */
2474  rsc->partial_migration_target = target;
2475  rsc->partial_migration_source = source;
2476  }
2477  } else {
2478  /* Consider it failed here - forces a restart, prevents migration */
2479  set_bit(rsc->flags, pe_rsc_failed);
2481  }
2482  }
2483 }
2484 
2485 static void
2486 unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
2487 {
2488  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2489 
2490  CRM_ASSERT(rsc);
2491  if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2492  int stop_id = 0;
2493  int migrate_id = 0;
2494  const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2495  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2496 
2497  xmlNode *stop_op =
2498  find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set);
2499  xmlNode *migrate_op =
2500  find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target,
2501  data_set);
2502 
2503  if (stop_op) {
2504  crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
2505  }
2506  if (migrate_op) {
2507  crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
2508  }
2509 
2510  /* Get our state right */
2511  rsc->role = RSC_ROLE_STARTED; /* can be master? */
2512 
2513  if (stop_op == NULL || stop_id < migrate_id) {
2514  node_t *source = pe_find_node(data_set->nodes, migrate_source);
2515 
2516  if (source && source->details->online) {
2517  native_add_running(rsc, source, data_set);
2518  }
2519  }
2520 
2521  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
2522  int stop_id = 0;
2523  int migrate_id = 0;
2524  const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2525  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2526 
2527  xmlNode *stop_op =
2528  find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set);
2529  xmlNode *migrate_op =
2530  find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
2531  data_set);
2532 
2533  if (stop_op) {
2534  crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
2535  }
2536  if (migrate_op) {
2537  crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
2538  }
2539 
2540  /* Get our state right */
2541  rsc->role = RSC_ROLE_STARTED; /* can be master? */
2542 
2543  if (stop_op == NULL || stop_id < migrate_id) {
2544  node_t *target = pe_find_node(data_set->nodes, migrate_target);
2545 
2546  pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op,
2547  migrate_id);
2548  if (target && target->details->online) {
2549  native_add_running(rsc, target, data_set);
2550  }
2551 
2552  } else if (migrate_op == NULL) {
2553  /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */
2554  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2555  }
2556  }
2557 }
2558 
2559 static void
2560 record_failed_op(xmlNode *op, node_t* node, resource_t *rsc, pe_working_set_t * data_set)
2561 {
2562  xmlNode *xIter = NULL;
2563  const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
2564 
2565  if (node->details->online == FALSE) {
2566  return;
2567  }
2568 
2569  for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
2570  const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
2571  const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
2572 
2573  if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
2574  crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
2575  return;
2576  }
2577  }
2578 
2579  crm_trace("Adding entry %s on %s", op_key, node->details->uname);
2580  crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
2581  crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
2582  add_node_copy(data_set->failed, op);
2583 }
2584 
2585 static const char *get_op_key(xmlNode *xml_op)
2586 {
2587  const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
2588  if(key == NULL) {
2589  key = ID(xml_op);
2590  }
2591  return key;
2592 }
2593 
2594 static void
2595 unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
2596  enum action_fail_response * on_fail, pe_working_set_t * data_set)
2597 {
2598  guint interval_ms = 0;
2599  bool is_probe = FALSE;
2600  action_t *action = NULL;
2601 
2602  const char *key = get_op_key(xml_op);
2603  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2604 
2605  CRM_ASSERT(rsc);
2606 
2607  *last_failure = xml_op;
2608 
2609  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2610  if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2611  is_probe = TRUE;
2612  pe_rsc_trace(rsc, "is a probe: %s", key);
2613  }
2614 
2615  if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
2616  crm_warn("Processing failed %s of %s on %s: %s " CRM_XS " rc=%d",
2617  (is_probe? "probe" : task), rsc->id, node->details->uname,
2618  services_ocf_exitcode_str(rc), rc);
2619 
2620  if (is_probe && (rc != PCMK_OCF_OK)
2621  && (rc != PCMK_OCF_NOT_RUNNING)
2622  && (rc != PCMK_OCF_RUNNING_MASTER)) {
2623 
2624  /* A failed (not just unexpected) probe result could mean the user
2625  * didn't know resources will be probed even where they can't run.
2626  */
2627  crm_notice("If it is not possible for %s to run on %s, see "
2628  "the resource-discovery option for location constraints",
2629  rsc->id, node->details->uname);
2630  }
2631 
2632  record_failed_op(xml_op, node, rsc, data_set);
2633 
2634  } else {
2635  crm_trace("Processing failed op %s for %s on %s: %s (%d)",
2636  task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
2637  rc);
2638  }
2639 
2640  action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
2641  if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
2642  (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) ||
2643  (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) ||
2644  (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) {
2645  pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
2646  fail2text(action->on_fail), action->uuid, key);
2647  *on_fail = action->on_fail;
2648  }
2649 
2650  if (safe_str_eq(task, CRMD_ACTION_STOP)) {
2651  resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
2652 
2653  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2654  unpack_rsc_migration_failure(rsc, node, xml_op, data_set);
2655 
2656  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
2657  rsc->role = RSC_ROLE_MASTER;
2658 
2659  } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
2660  if (action->on_fail == action_fail_block) {
2661  rsc->role = RSC_ROLE_MASTER;
2662  rsc->next_role = RSC_ROLE_STOPPED;
2663 
2664  } else if(rc == PCMK_OCF_NOT_RUNNING) {
2665  rsc->role = RSC_ROLE_STOPPED;
2666 
2667  } else {
2668  /*
2669  * Staying in master role would put the PE/TE into a loop. Setting
2670  * slave role is not dangerous because the resource will be stopped
2671  * as part of recovery, and any master promotion will be ordered
2672  * after that stop.
2673  */
2674  rsc->role = RSC_ROLE_SLAVE;
2675  }
2676  }
2677 
2678  if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
2679  /* leave stopped */
2680  pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
2681  rsc->role = RSC_ROLE_STOPPED;
2682 
2683  } else if (rsc->role < RSC_ROLE_STARTED) {
2684  pe_rsc_trace(rsc, "Setting %s active", rsc->id);
2685  set_active(rsc);
2686  }
2687 
2688  pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
2689  rsc->id, role2text(rsc->role),
2690  node->details->unclean ? "true" : "false",
2691  fail2text(action->on_fail), role2text(action->fail_role));
2692 
2693  if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
2694  rsc->next_role = action->fail_role;
2695  }
2696 
2697  if (action->fail_role == RSC_ROLE_STOPPED) {
2698  int score = -INFINITY;
2699 
2700  resource_t *fail_rsc = rsc;
2701 
2702  if (fail_rsc->parent) {
2703  resource_t *parent = uber_parent(fail_rsc);
2704 
2705  if (pe_rsc_is_clone(parent)
2706  && is_not_set(parent->flags, pe_rsc_unique)) {
2707  /* For clone resources, if a child fails on an operation
2708  * with on-fail = stop, all the resources fail. Do this by preventing
2709  * the parent from coming up again. */
2710  fail_rsc = parent;
2711  }
2712  }
2713  crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
2714  /* make sure it doesn't come up again */
2715  if (fail_rsc->allowed_nodes != NULL) {
2716  g_hash_table_destroy(fail_rsc->allowed_nodes);
2717  }
2718  fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
2719  g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
2720  }
2721 
2722  pe_free_action(action);
2723 }
2724 
2725 static int
2726 determine_op_status(
2727  resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set)
2728 {
2729  guint interval_ms = 0;
2730  int result = PCMK_LRM_OP_DONE;
2731 
2732  const char *key = get_op_key(xml_op);
2733  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2734 
2735  bool is_probe = FALSE;
2736 
2737  CRM_ASSERT(rsc);
2738  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2739  if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2740  is_probe = TRUE;
2741  }
2742 
2743  if (target_rc >= 0 && target_rc != rc) {
2744  result = PCMK_LRM_OP_ERROR;
2745  pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
2746  key, node->details->uname,
2747  services_ocf_exitcode_str(rc), rc,
2748  services_ocf_exitcode_str(target_rc), target_rc);
2749  }
2750 
2751  /* we could clean this up significantly except for old LRMs and CRMs that
2752  * didn't include target_rc and liked to remap status
2753  */
2754  switch (rc) {
2755  case PCMK_OCF_OK:
2756  if (is_probe && target_rc == 7) {
2757  result = PCMK_LRM_OP_DONE;
2758  pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
2759  task, rsc->id, node->details->uname);
2760  }
2761  break;
2762 
2763  case PCMK_OCF_NOT_RUNNING:
2764  if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
2765  result = PCMK_LRM_OP_DONE;
2766  rsc->role = RSC_ROLE_STOPPED;
2767 
2768  /* clear any previous failure actions */
2769  *on_fail = action_fail_ignore;
2770  rsc->next_role = RSC_ROLE_UNKNOWN;
2771 
2772  } else if (safe_str_neq(task, CRMD_ACTION_STOP)) {
2773  result = PCMK_LRM_OP_ERROR;
2774  }
2775  break;
2776 
2778  if (is_probe) {
2779  result = PCMK_LRM_OP_DONE;
2780  pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
2781  task, rsc->id, node->details->uname);
2782 
2783  } else if (target_rc == rc) {
2784  /* nothing to do */
2785 
2786  } else if (target_rc >= 0) {
2787  result = PCMK_LRM_OP_ERROR;
2788  }
2789  rsc->role = RSC_ROLE_MASTER;
2790  break;
2791 
2794  rsc->role = RSC_ROLE_MASTER;
2795  result = PCMK_LRM_OP_ERROR;
2796  break;
2797 
2799  result = PCMK_LRM_OP_ERROR_FATAL;
2800  break;
2801 
2806  if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && (interval_ms > 0)) {
2807  result = PCMK_LRM_OP_NOTSUPPORTED;
2808  break;
2809 
2810  } else if (pe_can_fence(data_set, node) == FALSE
2811  && safe_str_eq(task, CRMD_ACTION_STOP)) {
2812  /* If a stop fails and we can't fence, there's nothing else we can do */
2813  pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
2814  rsc->id, task, services_ocf_exitcode_str(rc), rc);
2816  set_bit(rsc->flags, pe_rsc_block);
2817  }
2818  result = PCMK_LRM_OP_ERROR_HARD;
2819  break;
2820 
2821  default:
2822  if (result == PCMK_LRM_OP_DONE) {
2823  crm_info("Treating %s (rc=%d) on %s as an ERROR",
2824  key, rc, node->details->uname);
2825  result = PCMK_LRM_OP_ERROR;
2826  }
2827  }
2828 
2829  return result;
2830 }
2831 
2832 static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set)
2833 {
2834  bool expired = FALSE;
2835  time_t last_failure = 0;
2836  guint interval_ms = 0;
2837  int failure_timeout = rsc->failure_timeout;
2838  const char *key = get_op_key(xml_op);
2839  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2840  const char *clear_reason = NULL;
2841 
2842  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2843 
2844  /* clearing recurring monitor operation failures automatically
2845  * needs to be carefully considered */
2846  if ((interval_ms != 0) && safe_str_eq(task, "monitor")) {
2847 
2848  /* TODO, in the future we should consider not clearing recurring monitor
2849  * op failures unless the last action for a resource was a "stop" action.
2850  * otherwise it is possible that clearing the monitor failure will result
2851  * in the resource being in an undeterministic state.
2852  *
2853  * For now we handle this potential undeterministic condition for remote
2854  * node connection resources by not clearing a recurring monitor op failure
2855  * until after the node has been fenced. */
2856 
2857  if (is_set(data_set->flags, pe_flag_stonith_enabled)
2858  && rsc->remote_reconnect_ms) {
2859 
2860  node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
2861  if (remote_node && remote_node->details->remote_was_fenced == 0) {
2862  if (strstr(ID(xml_op), "last_failure")) {
2863  crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id);
2864  }
2865  /* disabling failure timeout for this operation because we believe
2866  * fencing of the remote node should occur first. */
2867  failure_timeout = 0;
2868  }
2869  }
2870  }
2871 
2872  if (failure_timeout > 0) {
2873  int last_run = 0;
2874 
2875  if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
2876  time_t now = get_effective_time(data_set);
2877 
2878  if (now > (last_run + failure_timeout)) {
2879  expired = TRUE;
2880  }
2881  }
2882  }
2883 
2884  if (expired) {
2885  if (pe_get_failcount(node, rsc, &last_failure, pe_fc_default, xml_op,
2886  data_set)) {
2887 
2888  // There is a fail count ignoring timeout
2889 
2890  if (pe_get_failcount(node, rsc, &last_failure, pe_fc_effective,
2891  xml_op, data_set) == 0) {
2892  // There is no fail count considering timeout
2893  clear_reason = "it expired";
2894 
2895  } else {
2896  expired = FALSE;
2897  }
2898 
2899  } else if (rsc->remote_reconnect_ms
2900  && strstr(ID(xml_op), "last_failure")) {
2901  // Always clear last failure when reconnect interval is set
2902  clear_reason = "reconnect interval is set";
2903  }
2904 
2905  } else if (strstr(ID(xml_op), "last_failure") &&
2906  ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) {
2907 
2908  if (container_fix_remote_addr(rsc)) {
2909  /* We haven't allocated resources yet, so we can't reliably
2910  * substitute addr parameters for the REMOTE_CONTAINER_HACK.
2911  * When that's needed, defer the check until later.
2912  */
2913  pe__add_param_check(xml_op, rsc, node, pe_check_last_failure,
2914  data_set);
2915 
2916  } else {
2917  op_digest_cache_t *digest_data = NULL;
2918 
2919  digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
2920  switch (digest_data->rc) {
2921  case RSC_DIGEST_UNKNOWN:
2922  crm_trace("Resource %s history entry %s on %s has no digest to compare",
2923  rsc->id, key, node->details->id);
2924  break;
2925  case RSC_DIGEST_MATCH:
2926  break;
2927  default:
2928  clear_reason = "resource parameters have changed";
2929  break;
2930  }
2931  }
2932  }
2933 
2934  if (clear_reason != NULL) {
2935  // Schedule clearing of the fail count
2936  pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason,
2937  data_set);
2938 
2939  if (is_set(data_set->flags, pe_flag_stonith_enabled)
2940  && rsc->remote_reconnect_ms) {
2941 
2942  pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
2943 
2944  if (remote_node) {
2945  /* If we're clearing a remote connection due to a reconnect
2946  * interval, we want to wait until any scheduled fencing
2947  * completes.
2948  *
2949  * We could limit this to remote_node->details->unclean, but at
2950  * this point, that's always true (it won't be reliable until
2951  * after unpack_node_loop() is done).
2952  */
2953  pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
2954  data_set);
2955 
2956  crm_info("Clearing %s failure will wait until any scheduled "
2957  "fencing of %s completes", task, rsc->id);
2958  order_actions(fence, clear_op, pe_order_implies_then);
2959  }
2960  }
2961  }
2962 
2963  if (expired && (interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2964  switch(rc) {
2965  case PCMK_OCF_OK:
2966  case PCMK_OCF_NOT_RUNNING:
2968  case PCMK_OCF_DEGRADED:
2970  /* Don't expire probes that return these values */
2971  expired = FALSE;
2972  break;
2973  }
2974  }
2975 
2976  return expired;
2977 }
2978 
2979 int get_target_rc(xmlNode *xml_op)
2980 {
2981  int dummy = 0;
2982  int target_rc = 0;
2983  char *dummy_string = NULL;
2984  const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
2985  if (key == NULL) {
2986  return -1;
2987  }
2988 
2989  decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc);
2990  free(dummy_string);
2991 
2992  return target_rc;
2993 }
2994 
2995 static enum action_fail_response
2996 get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
2997 {
2998  int result = action_fail_recover;
2999  action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
3000 
3001  result = action->on_fail;
3002  pe_free_action(action);
3003 
3004  return result;
3005 }
3006 
3007 static void
3008 update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc,
3009  xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
3010 {
3011  gboolean clear_past_failure = FALSE;
3012 
3013  CRM_ASSERT(rsc);
3014  CRM_ASSERT(xml_op);
3015 
3016  if (rc == PCMK_OCF_NOT_RUNNING) {
3017  clear_past_failure = TRUE;
3018 
3019  } else if (rc == PCMK_OCF_NOT_INSTALLED) {
3020  rsc->role = RSC_ROLE_STOPPED;
3021 
3022  } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
3023  if (last_failure) {
3024  const char *op_key = get_op_key(xml_op);
3025  const char *last_failure_key = get_op_key(last_failure);
3026 
3027  if (safe_str_eq(op_key, last_failure_key)) {
3028  clear_past_failure = TRUE;
3029  }
3030  }
3031 
3032  if (rsc->role < RSC_ROLE_STARTED) {
3033  set_active(rsc);
3034  }
3035 
3036  } else if (safe_str_eq(task, CRMD_ACTION_START)) {
3037  rsc->role = RSC_ROLE_STARTED;
3038  clear_past_failure = TRUE;
3039 
3040  } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
3041  rsc->role = RSC_ROLE_STOPPED;
3042  clear_past_failure = TRUE;
3043 
3044  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
3045  rsc->role = RSC_ROLE_MASTER;
3046  clear_past_failure = TRUE;
3047 
3048  } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
3049  /* Demote from Master does not clear an error */
3050  rsc->role = RSC_ROLE_SLAVE;
3051 
3052  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
3053  rsc->role = RSC_ROLE_STARTED;
3054  clear_past_failure = TRUE;
3055 
3056  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
3057  unpack_rsc_migration(rsc, node, xml_op, data_set);
3058 
3059  } else if (rsc->role < RSC_ROLE_STARTED) {
3060  pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
3061  set_active(rsc);
3062  }
3063 
3064  /* clear any previous failure actions */
3065  if (clear_past_failure) {
3066  switch (*on_fail) {
3067  case action_fail_stop:
3068  case action_fail_fence:
3069  case action_fail_migrate:
3070  case action_fail_standby:
3071  pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
3072  rsc->id, fail2text(*on_fail));
3073  break;
3074 
3075  case action_fail_block:
3076  case action_fail_ignore:
3077  case action_fail_recover:
3079  *on_fail = action_fail_ignore;
3080  rsc->next_role = RSC_ROLE_UNKNOWN;
3081  break;
3083  if (rsc->remote_reconnect_ms == 0) {
3084  /* With no reconnect interval, the connection is allowed to
3085  * start again after the remote node is fenced and
3086  * completely stopped. (With a reconnect interval, we wait
3087  * for the failure to be cleared entirely before attempting
3088  * to reconnect.)
3089  */
3090  *on_fail = action_fail_ignore;
3091  rsc->next_role = RSC_ROLE_UNKNOWN;
3092  }
3093  break;
3094  }
3095  }
3096 }
3097 
3098 
3099 gboolean
3100 unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
3101  enum action_fail_response * on_fail, pe_working_set_t * data_set)
3102 {
3103  int task_id = 0;
3104 
3105  const char *key = NULL;
3106  const char *task = NULL;
3107  const char *task_key = NULL;
3108 
3109  int rc = 0;
3110  int status = PCMK_LRM_OP_UNKNOWN;
3111  int target_rc = get_target_rc(xml_op);
3112  guint interval_ms = 0;
3113 
3114  gboolean expired = FALSE;
3115  resource_t *parent = rsc;
3116  enum action_fail_response failure_strategy = action_fail_recover;
3117 
3118  CRM_CHECK(rsc != NULL, return FALSE);
3119  CRM_CHECK(node != NULL, return FALSE);
3120  CRM_CHECK(xml_op != NULL, return FALSE);
3121 
3122  task_key = get_op_key(xml_op);
3123 
3124  task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3126 
3127  crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
3128  crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
3129  crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
3130  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
3131 
3132  CRM_CHECK(task != NULL, return FALSE);
3133  CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE);
3134  CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
3135 
3136  if (safe_str_eq(task, CRMD_ACTION_NOTIFY) ||
3138  /* safe to ignore these */
3139  return TRUE;
3140  }
3141 
3142  if (is_not_set(rsc->flags, pe_rsc_unique)) {
3143  parent = uber_parent(rsc);
3144  }
3145 
3146  pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
3147  task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
3148 
3149  if (node->details->unclean) {
3150  pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
3151  " Further action depends on the value of the stop's on-fail attribute",
3152  node->details->uname, rsc->id);
3153  }
3154 
3155  if(status != PCMK_LRM_OP_NOT_INSTALLED) {
3156  expired = check_operation_expiry(rsc, node, rc, xml_op, data_set);
3157  }
3158 
3159  /* Degraded results are informational only, re-map them to their error-free equivalents */
3160  if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3161  rc = PCMK_OCF_OK;
3162 
3163  /* Add them to the failed list to highlight them for the user */
3164  if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
3165  crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK);
3166  record_failed_op(xml_op, node, rsc, data_set);
3167  }
3168 
3169  } else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3171 
3172  /* Add them to the failed list to highlight them for the user */
3173  if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
3175  record_failed_op(xml_op, node, rsc, data_set);
3176  }
3177  }
3178 
3179  if (expired && target_rc != rc) {
3180  const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
3181 
3182  pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
3183  key, node->details->uname,
3184  services_ocf_exitcode_str(rc), rc,
3185  services_ocf_exitcode_str(target_rc), target_rc);
3186 
3187  if (interval_ms == 0) {
3188  crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s",
3189  task_key, rc, magic, node->details->uname);
3190  goto done;
3191 
3192  } else if(node->details->online && node->details->unclean == FALSE) {
3193  crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s",
3194  task_key, rc, magic, node->details->uname);
3195  /* This is SO horrible, but we don't have access to CancelXmlOp() yet */
3196  crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
3197  goto done;
3198  }
3199  }
3200 
3201  if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
3202  status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
3203  }
3204 
3205  pe_rsc_trace(rsc, "Handling status: %d", status);
3206  switch (status) {
3207  case PCMK_LRM_OP_CANCELLED:
3208  /* do nothing?? */
3209  pe_err("Don't know what to do for cancelled ops yet");
3210  break;
3211 
3212  case PCMK_LRM_OP_PENDING:
3213  if (safe_str_eq(task, CRMD_ACTION_START)) {
3215  set_active(rsc);
3216 
3217  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
3218  rsc->role = RSC_ROLE_MASTER;
3219 
3220  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
3221  /* If a pending migrate_to action is out on a unclean node,
3222  * we have to force the stop action on the target. */
3223  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
3224  node_t *target = pe_find_node(data_set->nodes, migrate_target);
3225  if (target) {
3226  stop_action(rsc, target, FALSE);
3227  }
3228  }
3229 
3230  if (rsc->pending_task == NULL) {
3231  if (safe_str_eq(task, CRMD_ACTION_STATUS) && (interval_ms == 0)) {
3232  /* Pending probes are not printed, even if pending
3233  * operations are requested. If someone ever requests that
3234  * behavior, uncomment this and the corresponding part of
3235  * native.c:native_pending_task().
3236  */
3237  /*rsc->pending_task = strdup("probe");*/
3238  /*rsc->pending_node = node;*/
3239  } else {
3240  rsc->pending_task = strdup(task);
3241  rsc->pending_node = node;
3242  }
3243  }
3244  break;
3245 
3246  case PCMK_LRM_OP_DONE:
3247  pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
3248  update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
3249  break;
3250 
3252  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3253  if (failure_strategy == action_fail_ignore) {
3254  crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
3255  "Resource agent doesn't exist",
3256  task_key, status, rc, node->details->uname);
3257  /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
3258  *on_fail = action_fail_migrate;
3259  }
3260  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3261  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3262  break;
3263 
3264  case PCMK_LRM_OP_ERROR:
3267  case PCMK_LRM_OP_TIMEOUT:
3269 
3270  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3271  if ((failure_strategy == action_fail_ignore)
3272  || (failure_strategy == action_fail_restart_container
3273  && safe_str_eq(task, CRMD_ACTION_STOP))) {
3274 
3275  crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
3276  task_key, rc, node->details->uname);
3277 
3278  update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
3279  crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
3281 
3282  record_failed_op(xml_op, node, rsc, data_set);
3283 
3284  if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
3285  *on_fail = failure_strategy;
3286  }
3287 
3288  } else {
3289  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3290 
3291  if(status == PCMK_LRM_OP_ERROR_HARD) {
3292  do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
3293  "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
3294  parent->id, node->details->uname,
3295  task, services_ocf_exitcode_str(rc), rc);
3296 
3297  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3298 
3299  } else if(status == PCMK_LRM_OP_ERROR_FATAL) {
3300  crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
3301  parent->id, task, services_ocf_exitcode_str(rc), rc);
3302 
3303  resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
3304  }
3305  }
3306  break;
3307  }
3308 
3309  done:
3310  pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role));
3311  return TRUE;
3312 }
3313 
3314 gboolean
3315 add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set)
3316 {
3317  const char *cluster_name = NULL;
3318 
3319  g_hash_table_insert(node->details->attrs,
3320  strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
3321 
3322  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
3323  strdup(node->details->id));
3324  if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
3325  data_set->dc_node = node;
3326  node->details->is_dc = TRUE;
3327  g_hash_table_insert(node->details->attrs,
3328  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
3329  } else {
3330  g_hash_table_insert(node->details->attrs,
3331  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
3332  }
3333 
3334  cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
3335  if (cluster_name) {
3336  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
3337  strdup(cluster_name));
3338  }
3339 
3340  unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
3341  node->details->attrs, NULL, overwrite, data_set->now);
3342 
3343  if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
3344  const char *site_name = pe_node_attribute_raw(node, "site-name");
3345 
3346  if (site_name) {
3347  g_hash_table_insert(node->details->attrs,
3348  strdup(CRM_ATTR_SITE_NAME),
3349  strdup(site_name));
3350 
3351  } else if (cluster_name) {
3352  /* Default to cluster-name if unset */
3353  g_hash_table_insert(node->details->attrs,
3354  strdup(CRM_ATTR_SITE_NAME),
3355  strdup(cluster_name));
3356  }
3357  }
3358  return TRUE;
3359 }
3360 
3361 static GListPtr
3362 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
3363 {
3364  int counter = -1;
3365  int stop_index = -1;
3366  int start_index = -1;
3367 
3368  xmlNode *rsc_op = NULL;
3369 
3370  GListPtr gIter = NULL;
3371  GListPtr op_list = NULL;
3372  GListPtr sorted_op_list = NULL;
3373 
3374  /* extract operations */
3375  op_list = NULL;
3376  sorted_op_list = NULL;
3377 
3378  for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
3379  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
3380  crm_xml_add(rsc_op, "resource", rsc);
3381  crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
3382  op_list = g_list_prepend(op_list, rsc_op);
3383  }
3384  }
3385 
3386  if (op_list == NULL) {
3387  /* if there are no operations, there is nothing to do */
3388  return NULL;
3389  }
3390 
3391  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
3392 
3393  /* create active recurring operations as optional */
3394  if (active_filter == FALSE) {
3395  return sorted_op_list;
3396  }
3397 
3398  op_list = NULL;
3399 
3400  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
3401 
3402  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
3403  xmlNode *rsc_op = (xmlNode *) gIter->data;
3404 
3405  counter++;
3406 
3407  if (start_index < stop_index) {
3408  crm_trace("Skipping %s: not active", ID(rsc_entry));
3409  break;
3410 
3411  } else if (counter < start_index) {
3412  crm_trace("Skipping %s: old", ID(rsc_op));
3413  continue;
3414  }
3415  op_list = g_list_append(op_list, rsc_op);
3416  }
3417 
3418  g_list_free(sorted_op_list);
3419  return op_list;
3420 }
3421 
3422 GListPtr
3423 find_operations(const char *rsc, const char *node, gboolean active_filter,
3424  pe_working_set_t * data_set)
3425 {
3426  GListPtr output = NULL;
3427  GListPtr intermediate = NULL;
3428 
3429  xmlNode *tmp = NULL;
3430  xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
3431 
3432  node_t *this_node = NULL;
3433 
3434  xmlNode *node_state = NULL;
3435 
3436  for (node_state = __xml_first_child(status); node_state != NULL;
3437  node_state = __xml_next_element(node_state)) {
3438 
3439  if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
3440  const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
3441 
3442  if (node != NULL && safe_str_neq(uname, node)) {
3443  continue;
3444  }
3445 
3446  this_node = pe_find_node(data_set->nodes, uname);
3447  if(this_node == NULL) {
3448  CRM_LOG_ASSERT(this_node != NULL);
3449  continue;
3450 
3451  } else if (is_remote_node(this_node)) {
3452  determine_remote_online_status(data_set, this_node);
3453 
3454  } else {
3455  determine_online_status(node_state, this_node, data_set);
3456  }
3457 
3458  if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
3459  /* offline nodes run no resources...
3460  * unless stonith is enabled in which case we need to
3461  * make sure rsc start events happen after the stonith
3462  */
3463  xmlNode *lrm_rsc = NULL;
3464 
3465  tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
3466  tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
3467 
3468  for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL;
3469  lrm_rsc = __xml_next_element(lrm_rsc)) {
3470  if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
3471 
3472  const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
3473 
3474  if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
3475  continue;
3476  }
3477 
3478  intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
3479  output = g_list_concat(output, intermediate);
3480  }
3481  }
3482  }
3483  }
3484  }
3485 
3486  return output;
3487 }
GHashTable * tags
Definition: status.h:131
Services API.
#define LOG_TRACE
Definition: logging.h:35
gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set)
Definition: unpack.c:167
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:165
char uname[MAX_NAME]
Definition: internal.h:85
GListPtr nodes
Definition: status.h:108
#define XML_RSC_OP_LAST_CHANGE
Definition: msg_xml.h:278
gboolean unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *failed, pe_working_set_t *data_set)
Definition: unpack.c:3100
void verify_pe_options(GHashTable *options)
Definition: common.c:175
#define STATUS_PATH_MAX
Definition: unpack.c:2339
xmlNode * find_xml_node(xmlNode *cib, const char *node_path, gboolean must_find)
Definition: xml.c:1676
enum pe_quorum_policy no_quorum_policy
Definition: status.h:100
A dumping ground.
#define crm_notice(fmt, args...)
Definition: logging.h:251
GHashTable * known_on
Definition: status.h:305
#define CRMD_ACTION_MIGRATED
Definition: crm.h:145
xmlNode * failed
Definition: status.h:116
GHashTable * attrs
Definition: status.h:179
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:15
gboolean unseen
Definition: status.h:163
#define pe_flag_have_stonith_resource
Definition: status.h:67
node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t *data_set)
Definition: unpack.c:331
gboolean fixed
Definition: status.h:186
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:141
#define INFINITY
Definition: crm.h:71
gint sort_rsc_priority(gconstpointer a, gconstpointer b)
Definition: utils.c:412
gboolean determine_online_status(xmlNode *node_state, node_t *this_node, pe_working_set_t *data_set)
Definition: unpack.c:1379
#define CRM_ATTR_KIND
Definition: crm.h:88
gboolean get_target_role(resource_t *rsc, enum rsc_role_e *role)
Definition: utils.c:1709
GListPtr dangling_migrations
Definition: status.h:316
#define XML_NODE_IS_FENCED
Definition: msg_xml.h:241
#define XML_ATTR_TRANSITION_MAGIC
Definition: msg_xml.h:354
GHashTable * state
Definition: status.h:391
node_t * node_copy(const node_t *this_node)
Definition: utils.c:116
#define CRM_ATTR_IS_DC
Definition: crm.h:90
#define stop_action(rsc, node, optional)
Definition: internal.h:206
void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set)
Definition: remote.c:203
#define pe_flag_enable_unfencing
Definition: status.h:68
#define pe_rsc_orphan_container_filler
Definition: status.h:195
pe_resource_t * container
Definition: status.h:318
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1499
pe_node_t * partial_migration_source
Definition: status.h:303
#define XML_ATTR_QUORUM_PANIC
Definition: msg_xml.h:82
#define XML_ATTR_TYPE
Definition: msg_xml.h:97
bool pe_can_fence(pe_working_set_t *data_set, node_t *node)
Definition: utils.c:89
enum rsc_role_e role
Definition: status.h:308
#define XML_TAG_UTILIZATION
Definition: msg_xml.h:169
#define pe_flag_have_remote_nodes
Definition: status.h:81
bool container_fix_remote_addr(resource_t *rsc)
Definition: container.c:910
#define XML_RULE_ATTR_SCORE
Definition: msg_xml.h:294
#define XML_BOOLEAN_FALSE
Definition: msg_xml.h:106
#define crm_config_err(fmt...)
Definition: crm_internal.h:177
int get_target_rc(xmlNode *xml_op)
Definition: unpack.c:2979
gboolean standby
Definition: status.h:390
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition: remote.c:140
enum rsc_role_e next_role
Definition: status.h:309
action_t * pe_fence_op(node_t *node, const char *op, bool optional, const char *reason, pe_working_set_t *data_set)
Definition: utils.c:2161
enum action_fail_response on_fail
Definition: status.h:351
#define pe_rsc_orphan
Definition: status.h:192
int char2score(const char *score)
Definition: utils.c:197
#define pe_proc_warn(fmt...)
Definition: internal.h:21
pe_resource_t * remote_rsc
Definition: status.h:175
#define XML_TAG_TRANSIENT_NODEATTRS
Definition: msg_xml.h:359
#define CRMD_ACTION_NOTIFY
Definition: crm.h:158
#define pe_flag_startup_probes
Definition: status.h:79
long long crm_get_msec(const char *input)
Definition: utils.c:565
GHashTable * meta
Definition: status.h:311
GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node)
Definition: utils.c:1426
gboolean common_unpack(xmlNode *xml_obj, resource_t **rsc, resource_t *parent, pe_working_set_t *data_set)
Definition: complex.c:358
resource_object_functions_t * fns
Definition: status.h:270
#define XML_CIB_TAG_TAG
Definition: msg_xml.h:386
#define XML_LRM_TAG_RESOURCE
Definition: msg_xml.h:225
#define pe_flag_stop_rsc_orphans
Definition: status.h:71
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:212
#define CRMD_ACTION_PROMOTE
Definition: crm.h:153
int crm_parse_int(const char *text, const char *default_text)
Parse an integer value from a string.
Definition: strings.c:107
GListPtr fillers
Definition: status.h:319
gboolean pending
Definition: status.h:161
GListPtr resources
Definition: status.h:109
#define XML_NVPAIR_ATTR_NAME
Definition: msg_xml.h:337
#define XML_NODE_IS_MAINTENANCE
Definition: msg_xml.h:242
char * id
Definition: status.h:395
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: utils.c:1579
#define XML_NODE_EXPECTED
Definition: msg_xml.h:237
#define XML_CIB_TAG_RSC_TEMPLATE
Definition: msg_xml.h:178
AIS_Host host
Definition: internal.h:84
time_t get_effective_time(pe_working_set_t *data_set)
Definition: utils.c:1694
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:151
pe_node_t * pe_find_node(GListPtr node_list, const char *uname)
Definition: status.c:412
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:181
xmlNode * params_restart
Definition: internal.h:305
#define clear_bit(word, bit)
Definition: crm_internal.h:166
void copy_in_properties(xmlNode *target, xmlNode *src)
Definition: xml.c:1748
#define CRMD_JOINSTATE_NACK
Definition: crm.h:138
#define XML_CIB_TAG_LRM
Definition: msg_xml.h:223
#define CRM_ATTR_CLUSTER_NAME
Definition: crm.h:91
pe_node_t * partial_migration_target
Definition: status.h:302
GHashTable * tickets
Definition: status.h:103
#define pe_rsc_allow_migrate
Definition: status.h:215
gboolean remote_was_fenced
Definition: status.h:170
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition: nvpair.c:395
char * pending_task
Definition: status.h:284
#define XPATH_ENABLE_UNFENCING
Definition: unpack.c:145
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition: xpath.c:220
#define pe_proc_err(fmt...)
Definition: internal.h:20
gboolean remote_requires_reset
Definition: status.h:169
action_fail_response
Definition: common.h:34
char * strndup(const char *str, size_t len)
char * dc_uuid
Definition: status.h:92
int stonith_timeout
Definition: status.h:99
#define XML_CIB_TAG_PROPSET
Definition: msg_xml.h:160
gboolean decode_transition_key(const char *key, char **uuid, int *action, int *transition_id, int *target_rc)
Definition: operations.c:177
resource_t * find_container_child(const resource_t *bundle, const node_t *node)
Definition: container.c:1351
#define XML_LRM_ATTR_RSCID
Definition: msg_xml.h:267
gboolean remote_maintenance
Definition: status.h:171
gboolean unpack_resources(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:711
#define CRMD_ACTION_START
Definition: crm.h:147
uint32_t id
Definition: internal.h:80
gboolean is_dc
Definition: status.h:166
#define XML_LRM_ATTR_TASK_KEY
Definition: msg_xml.h:259
#define pe_rsc_block
Definition: status.h:194
#define XML_TAG_ATTR_SETS
Definition: msg_xml.h:161
#define XML_LRM_ATTR_TASK
Definition: msg_xml.h:258
const char * pe_base_name_end(const char *id)
Definition: unpack.c:1459
const char * role2text(enum rsc_role_e role)
Definition: common.c:329
gboolean is_remote_node(node_t *node)
Definition: remote.c:52
pe_node_t *(* location)(const pe_resource_t *, GList **, int)
Definition: complex.h:37
#define CRMD_ACTION_STOP
Definition: crm.h:150
int weight
Definition: status.h:185
gboolean unpack_status(xmlNode *status, pe_working_set_t *data_set)
Definition: unpack.c:1050
#define CRMD_JOINSTATE_DOWN
Definition: crm.h:135
#define crm_warn(fmt, args...)
Definition: logging.h:250
guint remote_reconnect_ms
Definition: status.h:283
#define CRMD_ACTION_DEMOTE
Definition: crm.h:155
#define set_bit(word, bit)
Definition: crm_internal.h:165
#define crm_atoi(text, default_text)
Definition: util.h:91
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: nvpair.c:420
#define crm_debug(fmt, args...)
Definition: logging.h:254
void native_add_running(resource_t *rsc, node_t *node, pe_working_set_t *data_set)
Definition: native.c:37
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:744
#define XML_CIB_ATTR_SHUTDOWN
Definition: msg_xml.h:244
#define XML_RSC_ATTR_CONTAINER
Definition: msg_xml.h:203
Utility functions.
match only clone instances
Definition: status.h:56
#define XML_ATTR_ID
Definition: msg_xml.h:94
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:360
#define XML_CIB_TAG_RESOURCE
Definition: msg_xml.h:172
gboolean unpack_nodes(xmlNode *xml_nodes, pe_working_set_t *data_set)
Definition: unpack.c:499
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:105
#define XML_CIB_TAG_STATE
Definition: msg_xml.h:156
#define pe_rsc_failed
Definition: status.h:209
gboolean unpacked
Definition: status.h:172
char * digest_all_calc
Definition: internal.h:306
int failure_timeout
Definition: status.h:281
#define stop_key(rsc)
Definition: internal.h:205
#define pe_flag_startup_fencing
Definition: status.h:77
#define CRM_ATTR_UNAME
Definition: crm.h:86
#define set_config_flag(data_set, option, flag)
Definition: unpack.c:25
#define XML_NODE_IS_PEER
Definition: msg_xml.h:239
GListPtr refs
Definition: status.h:396
#define crm_trace(fmt, args...)
Definition: logging.h:255
#define CRMD_JOINSTATE_MEMBER
Definition: crm.h:137
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:130
enum rsc_digest_cmp_val rc
Definition: internal.h:302
gboolean is_baremetal_remote_node(node_t *node)
Definition: remote.c:34
#define pe_rsc_is_container
Definition: status.h:219
char * digest_secure_calc
Definition: internal.h:307
gboolean unpack_remote_nodes(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:585
gboolean add_node_attrs(xmlNode *xml_obj, node_t *node, gboolean overwrite, pe_working_set_t *data_set)
Definition: unpack.c:3315
gboolean is_container_remote_node(node_t *node)
Definition: remote.c:43
xmlNode * add_node_copy(xmlNode *new_parent, xmlNode *xml_node)
Definition: xml.c:1866
const char * stonith_action
Definition: status.h:94
struct pe_node_shared_s * details
Definition: status.h:188
GListPtr running_on
Definition: status.h:304
#define crm_log_xml_debug(xml, text)
Definition: logging.h:262
unsigned long long flags
Definition: status.h:286
const char * uname
Definition: status.h:154
#define XML_TAG_META_SETS
Definition: msg_xml.h:162
Wrappers for and extensions to libxml2.
GHashTable * config_hash
Definition: status.h:102
#define XML_ATTR_UNAME
Definition: msg_xml.h:116
char * clone_name
Definition: status.h:260
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition: utils.c:2276
#define XML_RSC_ATTR_MANAGED
Definition: msg_xml.h:193
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:1888
char * clone_zero(const char *last_rsc_id)
Definition: unpack.c:1521
action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Definition: utils.c:439
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:462
#define pe_flag_maintenance_mode
Definition: status.h:64
time_t last_granted
Definition: status.h:389
pe_resource_t * pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: clone.c:58
void(* free)(pe_resource_t *)
Definition: complex.h:38
#define XML_LRM_ATTR_MIGRATE_TARGET
Definition: msg_xml.h:284
#define CIB_OPTIONS_FIRST
Definition: msg_xml.h:47
gboolean standby
Definition: status.h:159
#define XML_RSC_ATTR_REMOTE_NODE
Definition: msg_xml.h:206
char * uuid
Definition: status.h:345
#define XML_LRM_ATTR_RESTART_DIGEST
Definition: msg_xml.h:274
gboolean expected_up
Definition: status.h:165
void free_xml(xmlNode *child)
Definition: xml.c:2012
#define pe_flag_stop_everything
Definition: status.h:73
enum pe_obj_types variant
Definition: status.h:268
xmlNode * input
Definition: status.h:88
gboolean granted
Definition: status.h:388
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:204
#define XML_CIB_TAG_NODE
Definition: msg_xml.h:157
const char * placement_strategy
Definition: status.h:95
xmlNode * params_all
Definition: internal.h:303
pe_resource_t * pe_find_resource(GListPtr rsc_list, const char *id_rh)
Definition: status.c:360
GListPtr actions
Definition: status.h:297
const char * id
Definition: status.h:153
char * id
Definition: status.h:387
#define crm_config_warn(fmt...)
Definition: crm_internal.h:178
#define XML_ATTR_TRANSITION_KEY
Definition: msg_xml.h:355
gboolean rsc_discovery_enabled
Definition: status.h:168
#define CRM_XS
Definition: logging.h:43
GListPtr running_rsc
Definition: status.h:176
pe_node_t * dc_node
Definition: status.h:93
#define pe_rsc_unique
Definition: status.h:198
GHashTable * node_hash_from_list(GListPtr list)
Definition: utils.c:172
const char * localhost
Definition: status.h:130
guint crm_parse_ms(const char *text)
Definition: strings.c:127
gboolean xml_contains_remote_node(xmlNode *xml)
Definition: remote.c:81
gboolean is_remote_node
Definition: status.h:289
pe_node_t * pending_node
Definition: status.h:321
const char * fail2text(enum action_fail_response fail)
Definition: common.c:187
GListPtr children
Definition: status.h:315
#define pe_flag_quick_location
Definition: status.h:83
#define pe_rsc_start_pending
Definition: status.h:211
#define XML_LRM_TAG_RESOURCES
Definition: msg_xml.h:224
#define crm_err(fmt, args...)
Definition: logging.h:249
#define XML_CIB_TAG_TICKET_STATE
Definition: msg_xml.h:383
void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set)
Definition: utils.c:1535
#define CRM_ASSERT(expr)
Definition: results.h:20
xmlXPathObjectPtr xpath_search(xmlNode *xml_top, const char *path)
Definition: xpath.c:145
pe_action_t * pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:360
void pe_fence_node(pe_working_set_t *data_set, node_t *node, const char *reason)
Schedule a fence action for a node.
Definition: unpack.c:68
ticket_t * ticket_new(const char *ticket_id, pe_working_set_t *data_set)
Definition: utils.c:1826
bool remote_id_conflict(const char *remote_name, pe_working_set_t *data)
Definition: unpack.c:392
#define pe_rsc_promotable
Definition: status.h:200
#define XML_ATTR_HAVE_WATCHDOG
Definition: msg_xml.h:84
#define XML_NODE_ATTR_RSC_DISCOVERY
Definition: msg_xml.h:340
#define pe_flag_remove_after_stop
Definition: status.h:76
#define CRMD_ACTION_METADATA
Definition: crm.h:162
#define XML_LRM_ATTR_INTERVAL_MS
Definition: msg_xml.h:256
#define pe_rsc_failure_ignored
Definition: status.h:217
xmlNode * params_secure
Definition: internal.h:304
#define XML_LRM_ATTR_CALLID
Definition: msg_xml.h:270
#define pe_rsc_managed
Definition: status.h:193
#define CRMD_ACTION_MIGRATE
Definition: crm.h:144
#define XML_NVPAIR_ATTR_VALUE
Definition: msg_xml.h:338
int node_score_red
Definition: utils.c:61
#define crm_str_hash
Definition: util.h:55
GHashTable * utilization
Definition: status.h:180
#define uint32_t
Definition: stdint.in.h:158
enum rsc_role_e fail_role
Definition: status.h:352
gboolean shutdown
Definition: status.h:164
char data[0]
Definition: internal.h:90
#define crm_str(x)
Definition: logging.h:275
#define XML_LRM_ATTR_OPSTATUS
Definition: msg_xml.h:268
gboolean unpack_lrm_resources(node_t *node, xmlNode *lrm_rsc_list, pe_working_set_t *data_set)
Definition: unpack.c:2286
int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set)
Definition: failcounts.c:251
#define CRMD_JOINSTATE_PENDING
Definition: crm.h:136
uint32_t pe_wo
Definition: unpack.c:41
rsc_role_e
Definition: common.h:86
enum pe_action_flags flags
Definition: status.h:349
gboolean maintenance
Definition: status.h:167
#define XML_LRM_ATTR_RC
Definition: msg_xml.h:269
GHashTable * digest_cache
cache of calculated resource digests
Definition: status.h:181
bool pe__is_universal_clone(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: clone.c:633
GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set)
Definition: unpack.c:3423
#define XML_NODE_JOIN_STATE
Definition: msg_xml.h:236
void pe_free_action(action_t *action)
Definition: utils.c:1311
#define pe_flag_have_quorum
Definition: status.h:62
void destroy_ticket(gpointer data)
Definition: utils.c:1814
#define XML_CIB_TAG_STATUS
Definition: msg_xml.h:137
#define XML_CIB_TAG_OBJ_REF
Definition: msg_xml.h:387
void unpack_instance_attributes(xmlNode *top, xmlNode *xml_obj, const char *set_name, GHashTable *node_hash, GHashTable *hash, const char *always_first, gboolean overwrite, crm_time_t *now)
Definition: rules.c:886
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:238
#define pe_flag_stop_action_orphans
Definition: status.h:72
gboolean crm_is_true(const char *s)
Definition: strings.c:156
#define CRM_ATTR_SITE_NAME
Definition: crm.h:92
void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2103
#define XML_CIB_TAG_GROUP
Definition: msg_xml.h:173
CRM_TRACE_INIT_DATA(pe_status)
#define XML_LRM_TAG_RSC_OP
Definition: msg_xml.h:226
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:16
#define pe_flag_symmetric_cluster
Definition: status.h:63
#define ID(x)
Definition: msg_xml.h:412
unsigned long long flags
Definition: status.h:97
#define pe_err(fmt...)
Definition: internal.h:18
void print_resource(int log_level, const char *pre_text, resource_t *rsc, gboolean details)
Definition: utils.c:1295
gboolean unpack_tags(xmlNode *xml_tags, pe_working_set_t *data_set)
Definition: unpack.c:771
#define pe_rsc_needs_fencing
Definition: status.h:222
#define safe_str_eq(a, b)
Definition: util.h:54
int node_score_green
Definition: utils.c:62
#define ONLINESTATUS
Definition: util.h:34
gboolean order_actions(action_t *lh_action, action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1745
op_digest_cache_t * rsc_action_digest_cmp(resource_t *rsc, xmlNode *xml_op, node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2001
gboolean standby_onfail
Definition: status.h:160
#define XML_LRM_ATTR_MIGRATE_SOURCE
Definition: msg_xml.h:283
pe_resource_t *(* find_rsc)(pe_resource_t *parent, const char *search, const pe_node_t *node, int flags)
Definition: complex.h:29
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition: xpath.c:45
#define CRM_ATTR_ID
Definition: crm.h:87
gint sort_node_uname(gconstpointer a, gconstpointer b)
Definition: utils.c:215
gboolean unclean
Definition: status.h:162
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
GList * GListPtr
Definition: crm.h:190
enum node_type type
Definition: status.h:155
int node_score_yellow
Definition: utils.c:63
#define XML_CIB_TAG_TICKETS
Definition: msg_xml.h:382
crm_time_t * now
Definition: status.h:89
#define crm_info(fmt, args...)
Definition: logging.h:252
char * digest_restart_calc
Definition: internal.h:308
char * generate_op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key.
Definition: operations.c:37
GHashTable * template_rsc_sets
Definition: status.h:129
#define pe_flag_concurrent_fencing
Definition: status.h:69
pe_node_t * pe_find_node_any(GListPtr node_list, const char *id, const char *uname)
Definition: status.c:384
gboolean online
Definition: status.h:158
#define pe_flag_start_failure_fatal
Definition: status.h:75
#define pe_flag_stonith_enabled
Definition: status.h:66
GList * stop_needed
Definition: status.h:137
pe_resource_t * parent
Definition: status.h:266
enum crm_ais_msg_types type
Definition: internal.h:83
#define pe_warn_once(pe_wo_bit, fmt...)
Definition: unpack.h:97
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:14
char * id
Definition: status.h:259
GHashTable * allowed_nodes
Definition: status.h:306
#define CRMD_ACTION_STATUS
Definition: crm.h:161