Skip to content

Commit

Permalink
Merge pull request ceph#47138 from kamoltat/wip-ksirivad-fix-test-poo…
Browse files Browse the repository at this point in the history
…l-min-size

qa/tasks/ceph_manager.py: increase test_pool_min_size timeout

Reviewed-by: Neha Ojha <[email protected]>
  • Loading branch information
yuriw committed Aug 3, 2022
2 parents 9b1e601 + ed73288 commit dc218e4
Showing 1 changed file with 31 additions and 8 deletions.
39 changes: 31 additions & 8 deletions qa/tasks/ceph_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ def reweight_osd_or_by_util(self, osd=None):
options['max_change'])

def primary_affinity(self, osd=None):
self.log("primary_affinity")
if osd is None:
osd = random.choice(self.in_osds)
if random.random() >= .5:
Expand All @@ -673,6 +674,7 @@ def thrash_pg_upmap(self):
"""
Install or remove random pg_upmap entries in OSDMap
"""
self.log("thrash_pg_upmap")
from random import shuffle
out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty')
j = json.loads(out)
Expand All @@ -681,12 +683,14 @@ def thrash_pg_upmap(self):
if random.random() >= .3:
pgs = self.ceph_manager.get_pg_stats()
if not pgs:
self.log('No pgs; doing nothing')
return
pg = random.choice(pgs)
pgid = str(pg['pgid'])
poolid = int(pgid.split('.')[0])
sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid]
if len(sizes) == 0:
self.log('No pools; doing nothing')
return
n = sizes[0]
osds = self.in_osds + self.out_osds
Expand Down Expand Up @@ -715,6 +719,7 @@ def thrash_pg_upmap_items(self):
"""
Install or remove random pg_upmap_items entries in OSDMap
"""
self.log("thrash_pg_upmap_items")
from random import shuffle
out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty')
j = json.loads(out)
Expand All @@ -723,12 +728,14 @@ def thrash_pg_upmap_items(self):
if random.random() >= .3:
pgs = self.ceph_manager.get_pg_stats()
if not pgs:
self.log('No pgs; doing nothing')
return
pg = random.choice(pgs)
pgid = str(pg['pgid'])
poolid = int(pgid.split('.')[0])
sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid]
if len(sizes) == 0:
self.log('No pools; doing nothing')
return
n = sizes[0]
osds = self.in_osds + self.out_osds
Expand Down Expand Up @@ -884,15 +891,15 @@ def test_pool_min_size(self):
"""
self.log("test_pool_min_size")
self.all_up()
time.sleep(60) # buffer time for recovery to start.
self.ceph_manager.wait_for_recovery(
timeout=self.config.get('timeout')
)

minout = int(self.config.get("min_out", 1))
minlive = int(self.config.get("min_live", 2))
mindead = int(self.config.get("min_dead", 1))
self.log("doing min_size thrashing")
self.ceph_manager.wait_for_clean(timeout=60)
self.ceph_manager.wait_for_clean(timeout=180)
assert self.ceph_manager.is_clean(), \
'not clean before minsize thrashing starts'
while not self.stopping:
Expand Down Expand Up @@ -966,7 +973,7 @@ def test_pool_min_size(self):
# try a few times since there might be a concurrent pool
# creation or deletion
with safe_while(
sleep=5, tries=5,
sleep=25, tries=5,
action='check for active or peered') as proceed:
while proceed():
if self.ceph_manager.all_active_or_peered():
Expand All @@ -992,7 +999,7 @@ def inject_pause(self, conf_key, duration, check_after, should_be_down):
Pause injection testing. Check for osd being down when finished.
"""
the_one = random.choice(self.live_osds)
self.log("inject_pause on {osd}".format(osd=the_one))
self.log("inject_pause on osd.{osd}".format(osd=the_one))
self.log(
"Testing {key} pause injection for duration {duration}".format(
key=conf_key,
Expand Down Expand Up @@ -1167,6 +1174,7 @@ def test_map_discontinuity(self):
This sequence should cause the revived osd to have to handle
a map gap since the mons would have trimmed
"""
self.log("test_map_discontinuity")
while len(self.in_osds) < (self.minin + 1):
self.in_osd()
self.log("Waiting for recovery")
Expand Down Expand Up @@ -1208,8 +1216,9 @@ def choose_action(self):
mindead = int(self.config.get("min_dead", 0))

self.log('choose_action: min_in %d min_out '
'%d min_live %d min_dead %d' %
(minin, minout, minlive, mindead))
'%d min_live %d min_dead %d '
'chance_down %.2f' %
(minin, minout, minlive, mindead, chance_down))
actions = []
if len(self.in_osds) > minin:
actions.append((self.out_osd, 1.0,))
Expand Down Expand Up @@ -2683,7 +2692,11 @@ def is_clean(self):
True if all pgs are clean
"""
pgs = self.get_pg_stats()
return self._get_num_active_clean(pgs) == len(pgs)
if self._get_num_active_clean(pgs) == len(pgs):
return True
else:
self.dump_pgs_not_active_clean()
return False

def is_recovered(self):
"""
Expand Down Expand Up @@ -2729,6 +2742,12 @@ def dump_pgs_not_active(self):
self.log('PG %s is not active' % pg['pgid'])
self.log(pg)

def dump_pgs_not_active_peered(self, pgs):
for pg in pgs:
if (not pg['state'].count('active')) and (not pg['state'].count('peered')):
self.log('PG %s is not active or peered' % pg['pgid'])
self.log(pg)

def wait_for_clean(self, timeout=1200):
"""
Returns true when all pgs are clean.
Expand Down Expand Up @@ -2914,7 +2933,11 @@ def all_active_or_peered(self):
Wrapper to check if all PGs are active or peered
"""
pgs = self.get_pg_stats()
return self._get_num_active(pgs) + self._get_num_peered(pgs) == len(pgs)
if self._get_num_active(pgs) + self._get_num_peered(pgs) == len(pgs):
return True
else:
self.dump_pgs_not_active_peered(pgs)
return False

def wait_till_active(self, timeout=None):
"""
Expand Down

0 comments on commit dc218e4

Please sign in to comment.