Skip to content

Commit

Permalink
Merge pull request ceph#5922 from trociny/wip-pool-set-noscrub
Browse files Browse the repository at this point in the history
osd: per pool control for scrub and deep-scrub

Reviewed-by: David Zafman <[email protected]>
  • Loading branch information
dzafman committed Sep 22, 2015
2 parents 44a2609 + 30810da commit 424bcea
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 43 deletions.
48 changes: 48 additions & 0 deletions doc/rados/operations/pools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,27 @@ You may set values for the following keys:
:Version: Version ``FIXME``


``write_fadvise_dontneed``

:Description: Set/Unset WRITE_FADVISE_DONTNEED flag on a given pool.
:Type: Integer
:Valid Range: 1 sets flag, 0 unsets flag


``noscrub``

:Description: Set/Unset NOSCRUB flag on a given pool.
:Type: Integer
:Valid Range: 1 sets flag, 0 unsets flag


``nodeep-scrub``

:Description: Set/Unset NODEEP_SCRUB flag on a given pool.
:Type: Integer
:Valid Range: 1 sets flag, 0 unsets flag


``hit_set_type``

:Description: Enables hit set tracking for cache pools.
Expand Down Expand Up @@ -402,6 +423,33 @@ You may set values for the following keys:
:Example: ``1800`` 30min


``scrub_min_interval``

:Description: The maximum interval in seconds for pool scrubbing when
load is low. If it is 0, the value osd_scrub_min_interval
from config is used.

:Type: Double
:Default: ``0``

``scrub_max_interval``

:Description: The maximum interval in seconds for pool scrubbing
irrespective of cluster load. If it is 0, the value
osd_scrub_max_interval from config is used.

:Type: Double
:Default: ``0``


``deep_scrub_interval``

:Description: The interval in seconds for pool “deep” scrubbing. If it
is 0, the value osd_deep_scrub_interval from config is used.

:Type: Double
:Default: ``0``


Get Pool Values
===============
Expand Down
21 changes: 20 additions & 1 deletion qa/workunits/cephtool/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1355,15 +1355,34 @@ function test_mon_osd_pool_set()
ceph --format=xml osd pool get $TEST_POOL_GETSET auid | grep $auid
ceph osd pool set $TEST_POOL_GETSET auid 0

for flag in hashpspool nodelete nopgchange nosizechange; do
for flag in hashpspool nodelete nopgchange nosizechange write_fadvise_dontneed noscrub nodeep-scrub; do
ceph osd pool set $TEST_POOL_GETSET $flag false
ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: false"
ceph osd pool set $TEST_POOL_GETSET $flag true
ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: true"
ceph osd pool set $TEST_POOL_GETSET $flag 1
ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: true"
ceph osd pool set $TEST_POOL_GETSET $flag 0
ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: false"
expect_false ceph osd pool set $TEST_POOL_GETSET $flag asdf
expect_false ceph osd pool set $TEST_POOL_GETSET $flag 2
done

ceph osd pool set $TEST_POOL_GETSET scrub_min_interval 123456
ceph osd pool get $TEST_POOL_GETSET scrub_min_interval | grep 'scrub_min_interval: 123456'
ceph osd pool set $TEST_POOL_GETSET scrub_min_interval 0
ceph osd pool get $TEST_POOL_GETSET scrub_min_interval | grep 'scrub_min_interval: 0'

ceph osd pool set $TEST_POOL_GETSET scrub_max_interval 123456
ceph osd pool get $TEST_POOL_GETSET scrub_max_interval | grep 'scrub_max_interval: 123456'
ceph osd pool set $TEST_POOL_GETSET scrub_max_interval 0
ceph osd pool get $TEST_POOL_GETSET scrub_max_interval | grep 'scrub_max_interval: 0'

ceph osd pool set $TEST_POOL_GETSET deep_scrub_interval 123456
ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | grep 'deep_scrub_interval: 123456'
ceph osd pool set $TEST_POOL_GETSET deep_scrub_interval 0
ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | grep 'deep_scrub_interval: 0'

ceph osd pool set $TEST_POOL_GETSET nopgchange 1
expect_false ceph osd pool set $TEST_POOL_GETSET pg_num 10
expect_false ceph osd pool set $TEST_POOL_GETSET pgp_num 10
Expand Down
4 changes: 2 additions & 2 deletions src/mon/MonCommands.h
Original file line number Diff line number Diff line change
Expand Up @@ -674,11 +674,11 @@ COMMAND("osd pool rename " \
"rename <srcpool> to <destpool>", "osd", "rw", "cli,rest")
COMMAND("osd pool get " \
"name=pool,type=CephPoolname " \
"name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|write_fadvise_dontneed|all|min_write_recency_for_promote|fast_read", \
"name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|scrub_min_interval|scrub_max_interval|deep_scrub_interval", \
"get pool parameter <var>", "osd", "r", "cli,rest")
COMMAND("osd pool set " \
"name=pool,type=CephPoolname " \
"name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed|min_write_recency_for_promote|fast_read " \
"name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|scrub_min_interval|scrub_max_interval|deep_scrub_interval " \
"name=val,type=CephString " \
"name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
"set pool parameter <var> to <val>", "osd", "rw", "cli,rest")
Expand Down
107 changes: 83 additions & 24 deletions src/mon/OSDMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2874,15 +2874,17 @@ void OSDMonitor::dump_info(Formatter *f)
namespace {
enum osd_pool_get_choices {
SIZE, MIN_SIZE, CRASH_REPLAY_INTERVAL,
PG_NUM, PGP_NUM, CRUSH_RULESET, HIT_SET_TYPE,
HIT_SET_PERIOD, HIT_SET_COUNT, HIT_SET_FPP, USE_GMT_HITSET,
AUID, TARGET_MAX_OBJECTS, TARGET_MAX_BYTES,
PG_NUM, PGP_NUM, CRUSH_RULESET, HASHPSPOOL,
NODELETE, NOPGCHANGE, NOSIZECHANGE,
WRITE_FADVISE_DONTNEED, NOSCRUB, NODEEP_SCRUB,
HIT_SET_TYPE, HIT_SET_PERIOD, HIT_SET_COUNT, HIT_SET_FPP,
USE_GMT_HITSET, AUID, TARGET_MAX_OBJECTS, TARGET_MAX_BYTES,
CACHE_TARGET_DIRTY_RATIO, CACHE_TARGET_DIRTY_HIGH_RATIO,
CACHE_TARGET_FULL_RATIO,
CACHE_MIN_FLUSH_AGE, CACHE_MIN_EVICT_AGE,
ERASURE_CODE_PROFILE, MIN_READ_RECENCY_FOR_PROMOTE,
WRITE_FADVISE_DONTNEED, MIN_WRITE_RECENCY_FOR_PROMOTE,
FAST_READ};
MIN_WRITE_RECENCY_FOR_PROMOTE, FAST_READ,
SCRUB_MIN_INTERVAL, SCRUB_MAX_INTERVAL, DEEP_SCRUB_INTERVAL};

std::set<osd_pool_get_choices>
subtract_second_from_first(const std::set<osd_pool_get_choices>& first,
Expand Down Expand Up @@ -3339,6 +3341,10 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
("min_size", MIN_SIZE)
("crash_replay_interval", CRASH_REPLAY_INTERVAL)
("pg_num", PG_NUM)("pgp_num", PGP_NUM)("crush_ruleset", CRUSH_RULESET)
("hashpspool", HASHPSPOOL)("nodelete", NODELETE)
("nopgchange", NOPGCHANGE)("nosizechange", NOSIZECHANGE)
("noscrub", NOSCRUB)("nodeep-scrub", NODEEP_SCRUB)
("write_fadvise_dontneed", WRITE_FADVISE_DONTNEED)
("hit_set_type", HIT_SET_TYPE)("hit_set_period", HIT_SET_PERIOD)
("hit_set_count", HIT_SET_COUNT)("hit_set_fpp", HIT_SET_FPP)
("use_gmt_hitset", USE_GMT_HITSET)
Expand All @@ -3351,9 +3357,10 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
("cache_min_evict_age", CACHE_MIN_EVICT_AGE)
("erasure_code_profile", ERASURE_CODE_PROFILE)
("min_read_recency_for_promote", MIN_READ_RECENCY_FOR_PROMOTE)
("write_fadvise_dontneed", WRITE_FADVISE_DONTNEED)
("min_write_recency_for_promote", MIN_WRITE_RECENCY_FOR_PROMOTE)
("fast_read", FAST_READ);
("fast_read", FAST_READ)("scrub_min_interval", SCRUB_MIN_INTERVAL)
("scrub_max_interval", SCRUB_MAX_INTERVAL)
("deep_scrub_interval", DEEP_SCRUB_INTERVAL);

typedef std::set<osd_pool_get_choices> choices_set_t;

Expand Down Expand Up @@ -3409,6 +3416,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
if (f) {
for(choices_set_t::const_iterator it = selected_choices.begin();
it != selected_choices.end(); ++it) {
choices_map_t::const_iterator i;
f->open_object_section("pool");
f->dump_string("pool", poolstr);
f->dump_int("pool_id", pool);
Expand All @@ -3435,6 +3443,22 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
case CRUSH_RULESET:
f->dump_int("crush_ruleset", p->get_crush_ruleset());
break;
case HASHPSPOOL:
case NODELETE:
case NOPGCHANGE:
case NOSIZECHANGE:
case WRITE_FADVISE_DONTNEED:
case NOSCRUB:
case NODEEP_SCRUB:
for (i = ALL_CHOICES.begin(); i != ALL_CHOICES.end(); ++i) {
if (i->second == *it)
break;
}
assert(i != ALL_CHOICES.end());
f->dump_string(i->first.c_str(),
p->has_flag(pg_pool_t::get_flag_by_name(i->first)) ?
"true" : "false");
break;
case HIT_SET_PERIOD:
f->dump_int("hit_set_period", p->hit_set_period);
break;
Expand Down Expand Up @@ -3500,18 +3524,22 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
f->dump_int("min_read_recency_for_promote",
p->min_read_recency_for_promote);
break;
case WRITE_FADVISE_DONTNEED:
f->dump_string("write_fadvise_dontneed",
p->has_flag(pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED) ?
"true" : "false");
break;
case MIN_WRITE_RECENCY_FOR_PROMOTE:
f->dump_int("min_write_recency_for_promote",
p->min_write_recency_for_promote);
break;
case FAST_READ:
f->dump_int("fast_read", p->fast_read);
break;
case SCRUB_MIN_INTERVAL:
f->dump_int("scrub_min_interval", p->scrub_min_interval);
break;
case SCRUB_MAX_INTERVAL:
f->dump_int("scrub_max_interval", p->scrub_max_interval);
break;
case DEEP_SCRUB_INTERVAL:
f->dump_int("deep_scrub_interval", p->deep_scrub_interval);
break;
}
f->close_section();
f->flush(rdata);
Expand All @@ -3520,6 +3548,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
} else /* !f */ {
for(choices_set_t::const_iterator it = selected_choices.begin();
it != selected_choices.end(); ++it) {
choices_map_t::const_iterator i;
switch(*it) {
case PG_NUM:
ss << "pg_num: " << p->get_pg_num() << "\n";
Expand Down Expand Up @@ -3601,9 +3630,20 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
ss << "min_read_recency_for_promote: " <<
p->min_read_recency_for_promote << "\n";
break;
case HASHPSPOOL:
case NODELETE:
case NOPGCHANGE:
case NOSIZECHANGE:
case WRITE_FADVISE_DONTNEED:
ss << "write_fadvise_dontneed: " <<
(p->has_flag(pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED) ?
case NOSCRUB:
case NODEEP_SCRUB:
for (i = ALL_CHOICES.begin(); i != ALL_CHOICES.end(); ++i) {
if (i->second == *it)
break;
}
assert(i != ALL_CHOICES.end());
ss << i->first << ": " <<
(p->has_flag(pg_pool_t::get_flag_by_name(i->first)) ?
"true" : "false") << "\n";
break;
case MIN_WRITE_RECENCY_FOR_PROMOTE:
Expand All @@ -3613,6 +3653,15 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
case FAST_READ:
ss << "fast_read: " << p->fast_read << "\n";
break;
case SCRUB_MIN_INTERVAL:
ss << "scrub_min_interval: " << p->scrub_min_interval << "\n";
break;
case SCRUB_MAX_INTERVAL:
ss << "scrub_max_interval: " << p->scrub_max_interval << "\n";
break;
case DEEP_SCRUB_INTERVAL:
ss << "deep_scrub_interval: " << p->deep_scrub_interval << "\n";
break;
}
rdata.append(ss.str());
ss.str("");
Expand Down Expand Up @@ -4849,7 +4898,8 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
}
p.crush_ruleset = n;
} else if (var == "hashpspool" || var == "nodelete" || var == "nopgchange" ||
var == "nosizechange") {
var == "nosizechange" || var == "write_fadvise_dontneed" ||
var == "noscrub" || var == "nodeep-scrub") {
uint64_t flag = pg_pool_t::get_flag_by_name(var);
// make sure we only compare against 'n' if we didn't receive a string
if (val == "true" || (interr.empty() && n == 1)) {
Expand Down Expand Up @@ -4979,15 +5029,6 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
return -EINVAL;
}
p.min_read_recency_for_promote = n;
} else if (var == "write_fadvise_dontneed") {
if (val == "true" || (interr.empty() && n == 1)) {
p.flags |= pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED;
} else if (val == "false" || (interr.empty() && n == 0)) {
p.flags &= ~pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED;
} else {
ss << "expecting value 'true', 'false', '0', or '1'";
return -EINVAL;
}
} else if (var == "min_write_recency_for_promote") {
if (interr.length()) {
ss << "error parsing integer value '" << val << "': " << interr;
Expand All @@ -5004,6 +5045,24 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
} else if (val == "false" || (interr.empty() && n == 0)) {
p.fast_read = false;
}
} else if (var == "scrub_min_interval") {
if (floaterr.length()) {
ss << "error parsing floating point value '" << val << "': " << floaterr;
return -EINVAL;
}
p.scrub_min_interval = f;
} else if (var == "scrub_max_interval") {
if (floaterr.length()) {
ss << "error parsing floating point value '" << val << "': " << floaterr;
return -EINVAL;
}
p.scrub_max_interval = f;
} else if (var == "deep_scrub_interval") {
if (floaterr.length()) {
ss << "error parsing floating point value '" << val << "': " << floaterr;
return -EINVAL;
}
p.deep_scrub_interval = f;
} else {
ss << "unrecognized variable '" << var << "'";
return -EINVAL;
Expand Down
10 changes: 7 additions & 3 deletions src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5930,19 +5930,23 @@ bool OSD::scrub_random_backoff()
return false;
}

OSDService::ScrubJob::ScrubJob(const spg_t& pg, const utime_t& timestamp, bool must)
OSDService::ScrubJob::ScrubJob(const spg_t& pg, const utime_t& timestamp,
double pool_scrub_min_interval,
double pool_scrub_max_interval, bool must)
: pgid(pg),
sched_time(timestamp),
deadline(timestamp)
{
// if not explicitly requested, postpone the scrub with a random delay
if (!must) {
sched_time += g_conf->osd_scrub_min_interval;
sched_time += pool_scrub_min_interval > 0 ? pool_scrub_min_interval :
g_conf->osd_scrub_min_interval;
if (g_conf->osd_scrub_interval_randomize_ratio > 0) {
sched_time += rand() % (int)(g_conf->osd_scrub_min_interval *
g_conf->osd_scrub_interval_randomize_ratio);
}
deadline += g_conf->osd_scrub_max_interval;
deadline += pool_scrub_max_interval > 0 ? pool_scrub_max_interval :
g_conf->osd_scrub_max_interval;
}
}

Expand Down
10 changes: 7 additions & 3 deletions src/osd/OSD.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,15 +589,19 @@ class OSDService {
/// the hard upper bound of scrub time
utime_t deadline;
ScrubJob() {}
explicit ScrubJob(const spg_t& pg, const utime_t& timestamp, bool must = true);
explicit ScrubJob(const spg_t& pg, const utime_t& timestamp,
double pool_scrub_min_interval = 0,
double pool_scrub_max_interval = 0, bool must = true);
/// order the jobs by sched_time
bool operator<(const ScrubJob& rhs) const;
};
set<ScrubJob> sched_scrub_pg;

/// @returns the scrub_reg_stamp used for unregister the scrub job
utime_t reg_pg_scrub(spg_t pgid, utime_t t, bool must) {
ScrubJob scrub(pgid, t, must);
utime_t reg_pg_scrub(spg_t pgid, utime_t t, double pool_scrub_min_interval,
double pool_scrub_max_interval, bool must) {
ScrubJob scrub(pgid, t, pool_scrub_min_interval, pool_scrub_max_interval,
must);
Mutex::Locker l(sched_scrub_lock);
sched_scrub_pg.insert(scrub);
return scrub.sched_time;
Expand Down
Loading

0 comments on commit 424bcea

Please sign in to comment.