Skip to content

Commit

Permalink
Merge pull request ceph#48209 from kamoltat/wip-ksirivad-fix-tracker-…
Browse files Browse the repository at this point in the history
…57570

osd/OSDMap: Check for uneven weights & != 2 buckets post stretch mode

Reviewed-by: Greg Farnum <[email protected]>
  • Loading branch information
yuriw committed Jun 19, 2023
2 parents 1ee89e3 + cfc415c commit 5ae9588
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 1 deletion.
25 changes: 25 additions & 0 deletions doc/rados/operations/health-checks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1406,6 +1406,31 @@ other performance issue with the OSDs.
The exact size of the snapshot trim queue is reported by the ``snaptrimq_len``
field of ``ceph pg ls -f json-detail``.

Stretch Mode
------------

INCORRECT_NUM_BUCKETS_STRETCH_MODE
__________________________________

Stretch mode currently only support 2 dividing buckets with OSDs, this warning suggests
that the number of dividing buckets is not equal to 2 after stretch mode is enabled.
You can expect unpredictable failures and MON assertions until the condition is fixed.

We encourage you to fix this by removing additional dividing buckets or bump the
number of dividing buckets to 2.

UNEVEN_WEIGHTS_STRETCH_MODE
___________________________

The 2 dividing buckets must have equal weights when stretch mode is enabled.
This warning suggests that the 2 dividing buckets have uneven weights after
stretch mode is enabled. This is not immediately fatal, however, you can expect
Ceph to be confused when trying to process transitions between dividing buckets.

We encourage you to fix this by making the weights even on both dividing buckets.
This can be done by making sure the combined weight of the OSDs on each dividing
bucket are the same.

Miscellaneous
-------------

Expand Down
23 changes: 23 additions & 0 deletions qa/standalone/ceph-helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1747,6 +1747,29 @@ function test_wait_for_peered() {

#######################################################################

##
# Wait until the cluster's health condition disappeared.
# $TIMEOUT default
#
# @param string to grep for in health detail
# @return 0 if the cluster health doesn't matches request,
# 1 otherwise if after $TIMEOUT seconds health condition remains.
#
function wait_for_health_gone() {
local grepstr=$1
local -a delays=($(get_timeout_delays $TIMEOUT .1))
local -i loop=0

while ceph health detail | grep "$grepstr" ; do
if (( $loop >= ${#delays[*]} )) ; then
ceph health detail
return 1
fi
sleep ${delays[$loop]}
loop+=1
done
}

##
# Wait until the cluster has health condition passed as arg
# again for $TIMEOUT seconds.
Expand Down
1 change: 0 additions & 1 deletion qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,5 @@ EOF
sleep 3

teardown $dir || return 1

}
main mon-stretch-fail-recovery "$@"
145 changes: 145 additions & 0 deletions qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#!/usr/bin/env bash

source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
function run() {
local dir=$1
shift

export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one
export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one
export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one
export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one
export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "

export BASE_CEPH_ARGS=$CEPH_ARGS
CEPH_ARGS+="--mon-host=$CEPH_MON_A"

local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
setup $dir || return 1
$func $dir || return 1
teardown $dir || return 1
done
}
TEST_stretched_cluster_uneven_weight() {
local dir=$1
local OSDS=4
local weight=0.09000
setup $dir || return 1

run_mon $dir a --public-addr $CEPH_MON_A || return 1
wait_for_quorum 300 1 || return 1

run_mon $dir b --public-addr $CEPH_MON_B || return 1
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B"
wait_for_quorum 300 2 || return 1

run_mon $dir c --public-addr $CEPH_MON_C || return 1
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C"
wait_for_quorum 300 3 || return 1

run_mon $dir d --public-addr $CEPH_MON_D || return 1
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D"
wait_for_quorum 300 4 || return 1

run_mon $dir e --public-addr $CEPH_MON_E || return 1
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E"
wait_for_quorum 300 5 || return 1

ceph mon set election_strategy connectivity
ceph mon add disallowed_leader e

run_mgr $dir x || return 1
run_mgr $dir y || return 1
run_mgr $dir z || return 1

for osd in $(seq 0 $(expr $OSDS - 1))
do
run_osd $dir $osd || return 1
done

for zone in iris pze
do
ceph osd crush add-bucket $zone zone
ceph osd crush move $zone root=default
done

ceph osd crush add-bucket node-2 host
ceph osd crush add-bucket node-3 host
ceph osd crush add-bucket node-4 host
ceph osd crush add-bucket node-5 host

ceph osd crush move node-2 zone=iris
ceph osd crush move node-3 zone=iris
ceph osd crush move node-4 zone=pze
ceph osd crush move node-5 zone=pze

ceph osd crush move osd.0 host=node-2
ceph osd crush move osd.1 host=node-3
ceph osd crush move osd.2 host=node-4
ceph osd crush move osd.3 host=node-5

ceph mon set_location a zone=iris host=node-2
ceph mon set_location b zone=iris host=node-3
ceph mon set_location c zone=pze host=node-4
ceph mon set_location d zone=pze host=node-5

hostname=$(hostname -s)
ceph osd crush remove $hostname || return 1
ceph osd getcrushmap > crushmap || return 1
crushtool --decompile crushmap > crushmap.txt || return 1
sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1
cat >> crushmap_modified.txt << EOF
rule stretch_rule {
id 1
type replicated
min_size 1
max_size 10
step take iris
step chooseleaf firstn 2 type host
step emit
step take pze
step chooseleaf firstn 2 type host
step emit
}
# end crush map
EOF

crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1
ceph osd setcrushmap -i crushmap.bin || return 1
local stretched_poolname=stretched_rbdpool
ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1
ceph osd pool set $stretched_poolname size 4 || return 1

ceph mon set_location e zone=arbiter host=node-1 || return 1
ceph mon enable_stretch_mode e stretch_rule zone || return 1 # Enter strech mode

# reweight to a more round decimal.
ceph osd crush reweight osd.0 $weight
ceph osd crush reweight osd.1 $weight
ceph osd crush reweight osd.2 $weight
ceph osd crush reweight osd.3 $weight

# Firstly, we test for stretch mode buckets != 2
ceph osd crush add-bucket sham zone || return 1
ceph osd crush move sham root=default || return 1
wait_for_health "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1

ceph osd crush rm sham # clear the health warn
wait_for_health_gone "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1

# Next, we test for uneven weights across buckets

ceph osd crush reweight osd.0 0.07000

wait_for_health "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1

ceph osd crush reweight osd.0 $weight # clear the health warn

wait_for_health_gone "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1

teardown $dir || return 1
}
main mon-stretched-cluster-uneven-weight "$@"
18 changes: 18 additions & 0 deletions src/osd/OSDMap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7201,6 +7201,24 @@ void OSDMap::check_health(CephContext *cct,
ss.str(), 0);
}
}
// UNEQUAL_WEIGHT
if (stretch_mode_enabled) {
vector<int> subtrees;
crush->get_subtree_of_type(stretch_mode_bucket, &subtrees);
if (subtrees.size() != 2) {
stringstream ss;
ss << "Stretch mode buckets != 2";
checks->add("INCORRECT_NUM_BUCKETS_STRETCH_MODE", HEALTH_WARN, ss.str(), 0);
return;
}
int weight1 = crush->get_item_weight(subtrees[0]);
int weight2 = crush->get_item_weight(subtrees[1]);
stringstream ss;
if (weight1 != weight2) {
ss << "Stretch mode buckets have different weights!";
checks->add("UNEVEN_WEIGHTS_STRETCH_MODE", HEALTH_WARN, ss.str(), 0);
}
}
}

int OSDMap::parse_osd_id_list(const vector<string>& ls, set<int> *out,
Expand Down

0 comments on commit 5ae9588

Please sign in to comment.