3 scripts/drbd.ocf | 259 ++++++++++++++++++++++---------------------------------
4 2 files changed, 109 insertions(+), 151 deletions(-)
10 # Copyright (c) 2009 LINBIT HA-Solutions GmbH,
11 # Copyright (c) 2009 Florian Haas, Lars Ellenberg
12 +# Copyright (c) 2014 Wind River Systems, Inc. All rights reserved.
14 # Based on the Heartbeat drbd OCF Resource Agent by Lars Marowsky-Bree
15 # (though it turned out to be an almost complete rewrite)
17 @@ -216,20 +218,6 @@ do_drbdadm() {
22 - if [ -x ${HA_SBIN_DIR}/crm_master ]; then
23 - # Use quiet mode (-Q) to quench logging. Actual score updates
24 - # will get logged by attrd anyway
25 - do_cmd ${HA_SBIN_DIR}/crm_master -Q -l reboot -v $1
29 -remove_master_score() {
30 - if [ -x ${HA_SBIN_DIR}/crm_master ]; then
31 - do_cmd ${HA_SBIN_DIR}/crm_master -l reboot -D
35 _sh_status_process() {
36 # _volume not present should not happen,
37 # but may help make this agent work even if it talks to drbd 8.3.
38 @@ -242,6 +230,7 @@ _sh_status_process() {
39 DRBD_DSTATE_LOCAL[$_volume]=${_disk:-Unconfigured}
40 DRBD_DSTATE_REMOTE[$_volume]=${_pdsk:-DUnknown}
43 drbd_set_status_variables() {
44 # drbdsetup sh-status prints these values to stdout,
45 # and then prints _sh_status_process.
46 @@ -322,119 +311,9 @@ maybe_outdate_self()
47 ocf_log notice "outdating $DRBD_RESOURCE: according to OCF_RESKEY_CRM_meta_notify_master_uname, '$host' is still master"
48 do_drbdadm outdate $DRBD_RESOURCE
50 - # on some pacemaker versions, -INFINITY may cause resource instance stop/start.
51 - # But in this case that is ok, it may even clear the replication link
53 - set_master_score -INFINITY
58 -drbd_update_master_score() {
60 - # there may be constraint scores from rules on role=Master,
61 - # that in some ways can add to the node attribute based master score we
62 - # specify below. If you think you want to add personal preferences,
63 - # in case the scores given by this RA do not suffice, this is the
64 - # value space you can work with:
65 - # -INFINITY: Do not promote. Really. Won't work anyways.
66 - # Too bad, at least with current (Oktober 2009) Pacemaker,
67 - # negative master scores cause instance stop; restart cycle :(
68 - # missing, zero: Do not promote.
69 - # I think my data is not good enough.
70 - # Though, of course, you may try, and it might even work.
71 - # 5: please, do not promote, unless this is your only option.
72 - # 10: promotion is probably a bad idea, our local data is no good,
73 - # you'd probably run into severe performance problems, and risk
74 - # application crashes or blocking IO in case you lose the
75 - # replication connection.
76 - # 1000: Ok to be promoted, we have good data locally (though we don't
77 - # know about the peer, so possibly it has even better data?).
78 - # You sould use the crm-fence-peer.sh handler or similar
79 - # mechanism to avoid data divergence.
80 - # 10000: Please promote me/keep me Primary.
81 - # I'm confident that my data is as good as it gets.
83 - # For multi volume, we need to compare who is "better" a bit more sophisticated.
84 - # The ${XXX[*]//UpToDate}, without being in double quotes, results in a single space,
85 - # if all are UpToDate.
86 - : == DEBUG == ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate/ }/${DRBD_DSTATE_REMOTE[*]//UpToDate/ }/ ==
87 - case ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate/ }/${DRBD_DSTATE_REMOTE[*]//UpToDate/ }/ in
89 - # I am Primary, all local disks are UpToDate
90 - set_master_score 10000
93 - # all local disks are UpToDate,
94 - # but I'm not Primary,
95 - # and I'm not sure about the peer's disk state(s).
96 - # We may need to outdate ourselves?
97 - # But if we outdate in a MONITOR, and are disconnected
98 - # secondary because of a hard primary crash, before CRM noticed
99 - # that there is no more master, we'd make us utterly useless!
100 - # Trust that the primary will also notice the disconnect,
101 - # and will place an appropriate fencing constraint via
102 - # its fence-peer handler callback.
103 - set_master_score 1000
106 - # We know something about our peer, which means that either the
107 - # replication link is established, or it was not even
108 - # consistent last time we talked to each other.
109 - # Also all our local disks are UpToDate, which means even if we are
110 - # currently synchronizing, we do so as SyncSource.
111 - set_master_score 10000
115 - # At least one of our local disks is not up to date.
116 - # But our peer is ALL OK.
117 - # We can expect to have access to useful
118 - # data, but must expect degraded performance.
119 - set_master_score 10
122 - */*Negotiating*/*/)
123 - # some transitional state.
124 - # just don't do anything
130 - */*Inconsistent*/*/|\
132 - # ALWAYS put the cluster in MAINTENANCE MODE
133 - # if you add a volume to a live replication group,
134 - # because the new volume will typically come up as Inconsistent
135 - # the first time, which would cause a monitor to revoke the
138 - # At least some of our local disks are not really useable.
139 - # Our peer is not all good either (or some previous case block
140 - # would have matched). We have no access to useful data.
141 - # DRBD would refuse to be promoted, anyways.
143 - # set_master_score -INFINITY
144 - # Too bad, at least with current (Oktober 2009) Pacemaker,
145 - # negative master scores cause instance stop; restart cycle :(
146 - # Hope that this will suffice.
147 - remove_master_score
150 - # All local disks seem to be Consistent.
151 - # They _may_ be up to date, or not.
152 - # We hope that fencing mechanisms have put constraints in
153 - # place, so we won't be promoted with stale data.
154 - # But in case this was a cluster crash,
155 - # at least allow _someone_ to be promoted.
160 - return $OCF_SUCCESS
166 @@ -488,7 +367,103 @@ drbd_status() {
170 -# I'm sorry, but there is no $OCF_DEGRADED_MASTER or similar yet.
178 + if [ $status -ne $OCF_SUCCESS -a $status -ne $OCF_RUNNING_MASTER ]
183 + drbd_set_status_variables
185 + ocf_log info "${OCF_RESKEY_drbd_resource} ${DRBD_ROLE_LOCAL}/${DRBD_DSTATE_LOCAL}/${DRBD_DSTATE_REMOTE} ${DRBD_CSTATE}"
187 + case "${DRBD_DSTATE_LOCAL}" in
189 + case "${DRBD_CSTATE}" in
191 + rc=$OCF_DATA_STANDALONE
192 + ocf_log info "${OCF_RESKEY_drbd_resource} standalone, attempting to reconnect."
193 + do_drbdadm connect ${OCF_RESKEY_drbd_resource}
195 + StartingSyncT | WFBitMapT | WFSyncUUID | SyncTarget | \
198 + #drbd-overview | grep -A 1 drbd-cgcs | grep sync\'ed | cut -f2,3 -d' '
199 + ocf_log info "${OCF_RESKEY_drbd_resource} syncing"
206 + case "${DRBD_CSTATE}" in
208 + rc=$OCF_DATA_STANDALONE
209 + ocf_log info "${OCF_RESKEY_drbd_resource} standalone, attempting to reconnect"
210 + do_drbdadm connect ${OCF_RESKEY_drbd_resource}
213 + rc=$OCF_DATA_CONSISTENT
214 + ocf_log info "${OCF_RESKEY_drbd_resource} consistent"
219 + rc=$OCF_DATA_OUTDATED
220 + ocf_log info "${OCF_RESKEY_drbd_resource} outdated"
223 + case "${DRBD_CSTATE}" in
225 + rc=$OCF_DATA_STANDALONE
226 + ocf_log info "${OCF_RESKEY_drbd_resource} standalone"
228 + StartingSyncT | WFBitMapT | WFSyncUUID | SyncTarget | \
231 + ocf_log info "${OCF_RESKEY_drbd_resource} sync"
234 + rc=$OCF_DATA_INCONSISTENT
235 + ocf_log info "${OCF_RESKEY_drbd_resource} inconsistent"
241 + if [ $status -eq $OCF_RUNNING_MASTER ]
243 + if [ $rc -eq $OCF_DATA_INCONSISTENT ]
245 + rc=$OCF_RUNNING_MASTER_DATA_INCONSISTENT
247 + elif [ $rc -eq $OCF_DATA_OUTDATED ]
249 + rc=$OCF_RUNNING_MASTER_DATA_OUTDATED
251 + elif [ $rc -eq $OCF_DATA_CONSISTENT ]
253 + rc=$OCF_RUNNING_MASTER_DATA_CONSISTENT
255 + elif [ $rc -eq $OCF_DATA_SYNC ]
257 + rc=$OCF_RUNNING_MASTER_DATA_SYNC
259 + elif [ $rc -eq $OCF_DATA_STANDALONE ]
261 + rc=$OCF_RUNNING_MASTER_DATA_STANDALONE
271 @@ -501,7 +476,8 @@ drbd_monitor() {
275 - drbd_update_master_score
276 + drbd_condition $status
281 @@ -578,7 +554,8 @@ drbd_start() {
282 # "running" already, anyways, right?
283 figure_out_drbd_peer_uname
284 do_drbdadm $DRBD_TO_PEER adjust $DRBD_RESOURCE
286 + drbd_condition $OCF_SUCCESS
291 @@ -606,9 +583,6 @@ drbd_start() {
292 $first_try || sleep 1
295 - # in case someone does not configure monitor,
296 - # we must at least call it once after start.
297 - drbd_update_master_score
301 @@ -642,7 +616,8 @@ drbd_promote() {
306 + drbd_condition $OCF_SUCCESS
310 $first_try || sleep 1
311 @@ -666,7 +641,8 @@ drbd_demote() {
316 + drbd_condition $OCF_SUCCESS
321 @@ -718,14 +694,9 @@ drbd_stop() {
322 # outdate myself in drbd on-disk meta data.
325 - # do not let old master scores laying around.
326 - # they may confuse crm if this node was set to standby.
327 - remove_master_score
334 local n_type=$OCF_RESKEY_CRM_meta_notify_type
335 local n_op=$OCF_RESKEY_CRM_meta_notify_operation
336 @@ -760,7 +731,6 @@ drbd_notify() {
337 # After something has been done is a good time to
338 # recheck our status:
339 drbd_set_status_variables
340 - drbd_update_master_score
342 : == DEBUG == ${DRBD_DSTATE_REMOTE[*]} ==
343 case ${DRBD_DSTATE_REMOTE[*]} in
344 @@ -793,17 +763,6 @@ ls_stat_is_block_maj_147() {
345 [[ $1 = b* ]] && [[ $5 == 147,* ]]
348 -check_crm_feature_set()
350 - set -- ${OCF_RESKEY_crm_feature_set//[!0-9]/ }
351 - local a=${1:-0} b=${2:-0} c=${3:-0}
354 - (( a == 3 && b > 0 )) ||
355 - (( a == 3 && b == 0 && c > 0 )) ||
356 - ocf_log warn "You may be disappointed: This RA is intended for pacemaker 1.0 or better!"
359 drbd_validate_all () {
361 DRBDSETUP="drbdsetup"
362 @@ -821,7 +780,6 @@ drbd_validate_all () {
363 if (( $DRBDADM_VERSION_CODE >= 0x080400 )); then
364 DRBD_HAS_MULTI_VOLUME=true
366 - check_crm_feature_set
368 # Check clone and M/S options.
369 meta_expect clone-max -le 2
370 @@ -890,7 +848,6 @@ drbd_validate_all () {
371 # hm. probably misconfigured constraint somewhere.
372 # sorry. don't retry anywhere.
373 ocf_log err "DRBD resource ${DRBD_RESOURCE} not found in configuration file ${OCF_RESKEY_drbdconf}."
374 - remove_master_score
375 return $OCF_ERR_INSTALLED
381 # description: Loads and unloads the drbd module
383 # Copyright 2001-2010 LINBIT
384 +# Copyright (c) 2014 Wind River Systems, Inc. All rights reserved.
386 # Philipp Reisner, Lars Ellenberg