Skip to content

Commit

Permalink
libceph, ceph: get and handle cluster maps with addrvecs
Browse files Browse the repository at this point in the history
In preparation for msgr2, make the cluster send us maps with addrvecs
including both LEGACY and MSGR2 addrs instead of a single LEGACY addr.
This means advertising support for SERVER_NAUTILUS and also some older
features: SERVER_MIMIC, MONENC and MONNAMES.

MONNAMES and MONENC are actually pre-argonaut, we just never updated
ceph_monmap_decode() for them.  Decoding is unconditional, see commit
23c625c ("libceph: assume argonaut on the server side").

SERVER_MIMIC doesn't bear any meaning for the kernel client.

Since ceph_decode_entity_addrvec() is guarded by encoding version
checks (and in msgr2 case it is guarded implicitly by the fact that
server is speaking msgr2), we assume MSG_ADDR2 for it.

Signed-off-by: Ilya Dryomov <[email protected]>
  • Loading branch information
idryomov committed Dec 14, 2020
1 parent 8921f25 commit a5cbd5f
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 72 deletions.
2 changes: 1 addition & 1 deletion fs/ceph/mds_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -5014,7 +5014,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
return;
}

newmap = ceph_mdsmap_decode(&p, end);
newmap = ceph_mdsmap_decode(&p, end, false);
if (IS_ERR(newmap)) {
err = PTR_ERR(newmap);
goto bad_unlock;
Expand Down
21 changes: 11 additions & 10 deletions fs/ceph/mdsmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ static int __decode_and_drop_compat_set(void **p, void* end)
* Ignore any fields we don't care about (there are quite a few of
* them).
*/
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_mdsmap *m;
const void *start = *p;
Expand Down Expand Up @@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
namelen = ceph_decode_32(p); /* skip mds name */
*p += namelen;

ceph_decode_need(p, end,
4*sizeof(u32) + sizeof(u64) +
sizeof(addr) + sizeof(struct ceph_timespec),
bad);
mds = ceph_decode_32(p);
inc = ceph_decode_32(p);
state = ceph_decode_32(p);
ceph_decode_32_safe(p, end, mds, bad);
ceph_decode_32_safe(p, end, inc, bad);
ceph_decode_32_safe(p, end, state, bad);
*p += sizeof(u64); /* state_seq */
err = ceph_decode_entity_addr(p, end, &addr);
if (info_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
err = ceph_decode_entity_addr(p, end, &addr);
if (err)
goto corrupt;
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));

ceph_decode_copy_safe(p, end, &laggy_since, sizeof(laggy_since),
bad);
laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
*p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad);
Expand Down
11 changes: 8 additions & 3 deletions include/linux/ceph/ceph_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
* feature. Base case is 1 (first use).
*/
#define CEPH_FEATURE_INCARNATION_1 (0ull)
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL
#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC

#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \
static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \
Expand Down Expand Up @@ -75,7 +76,7 @@
DEFINE_CEPH_FEATURE( 0, 1, UID)
DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)

DEFINE_CEPH_FEATURE( 2, 3, SERVER_NAUTILUS)
DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
Expand Down Expand Up @@ -114,7 +115,7 @@ DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
DEFINE_CEPH_FEATURE(28, 2, SERVER_M)
DEFINE_CEPH_FEATURE(28, 2, SERVER_MIMIC)
DEFINE_CEPH_FEATURE(29, 1, MDSENC)
DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me
Expand Down Expand Up @@ -177,13 +178,16 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
*/
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_SERVER_NAUTILUS | \
CEPH_FEATURE_FLOCK | \
CEPH_FEATURE_SUBSCRIBE2 | \
CEPH_FEATURE_MONNAMES | \
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_DIRLAYOUTHASH | \
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC | \
CEPH_FEATURE_MONENC | \
CEPH_FEATURE_CRUSH_TUNABLES | \
CEPH_FEATURE_SERVER_LUMINOUS | \
CEPH_FEATURE_RESEND_ON_SPLIT | \
Expand All @@ -193,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_MSG_AUTH | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE | \
CEPH_FEATURE_SERVER_MIMIC | \
CEPH_FEATURE_MDSENC | \
CEPH_FEATURE_OSDHASHPSPOOL | \
CEPH_FEATURE_OSD_CACHEPOOL | \
Expand Down
4 changes: 4 additions & 0 deletions include/linux/ceph/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
*/
#define CEPH_ENTITY_ADDR_TYPE_NONE 0
#define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1)
#define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2)

static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a)
{
Expand All @@ -239,6 +240,9 @@ static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a)

extern int ceph_decode_entity_addr(void **p, void *end,
struct ceph_entity_addr *addr);
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr);

/*
* encoders
*/
Expand Down
2 changes: 1 addition & 1 deletion include/linux/ceph/mdsmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
}

extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2);
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);

Expand Down
4 changes: 2 additions & 2 deletions include/linux/ceph/osdmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
}

struct ceph_osdmap *ceph_osdmap_alloc(void);
extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end);
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2);
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map);
extern void ceph_osdmap_destroy(struct ceph_osdmap *map);

Expand Down
56 changes: 56 additions & 0 deletions net/ceph/decode.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>

#include <linux/ceph/decode.h>

Expand Down Expand Up @@ -82,3 +83,58 @@ ceph_decode_entity_addr(void **p, void *end, struct ceph_entity_addr *addr)
}
EXPORT_SYMBOL(ceph_decode_entity_addr);

/*
* Return addr of desired type (MSGR2 or LEGACY) or error.
* Make sure there is only one match.
*
* Assume encoding with MSG_ADDR2.
*/
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
__le32 my_type = msgr2 ? CEPH_ENTITY_ADDR_TYPE_MSGR2 :
CEPH_ENTITY_ADDR_TYPE_LEGACY;
struct ceph_entity_addr tmp_addr;
int addr_cnt;
bool found;
u8 marker;
int ret;
int i;

ceph_decode_8_safe(p, end, marker, e_inval);
if (marker != 2) {
pr_err("bad addrvec marker %d\n", marker);
return -EINVAL;
}

ceph_decode_32_safe(p, end, addr_cnt, e_inval);

found = false;
for (i = 0; i < addr_cnt; i++) {
ret = ceph_decode_entity_addr(p, end, &tmp_addr);
if (ret)
return ret;

if (tmp_addr.type == my_type) {
if (found) {
pr_err("another match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -EINVAL;
}

memcpy(addr, &tmp_addr, sizeof(*addr));
found = true;
}
}
if (!found && addr_cnt != 0) {
pr_err("no match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -ENOENT;
}

return 0;

e_inval:
return -EINVAL;
}
EXPORT_SYMBOL(ceph_decode_entity_addrvec);
145 changes: 105 additions & 40 deletions net/ceph/mon_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,57 +36,122 @@ static const struct ceph_connection_operations mon_con_ops;

static int __validate_auth(struct ceph_mon_client *monc);

static int decode_mon_info(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
void *mon_info_end;
u32 struct_len;
u8 struct_v;
int ret;

ret = ceph_start_decoding(p, end, 1, "mon_info_t", &struct_v,
&struct_len);
if (ret)
return ret;

mon_info_end = *p + struct_len;
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
ret = ceph_decode_entity_addrvec(p, end, msgr2, addr);
if (ret)
return ret;

*p = mon_info_end;
return 0;

e_inval:
return -EINVAL;
}

/*
* Decode a monmap blob (e.g., during mount).
*
* Assume MonMap v3 (i.e. encoding with MONNAMES and MONENC).
*/
static struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_monmap *m = NULL;
int i, err = -EINVAL;
struct ceph_monmap *monmap = NULL;
struct ceph_fsid fsid;
u32 epoch, num_mon;
u32 len;
u32 struct_len;
int blob_len;
int num_mon;
u8 struct_v;
u32 epoch;
int ret;
int i;

ceph_decode_32_safe(p, end, blob_len, e_inval);
ceph_decode_need(p, end, blob_len, e_inval);

ret = ceph_start_decoding(p, end, 6, "monmap", &struct_v, &struct_len);
if (ret)
goto fail;

dout("%s struct_v %d\n", __func__, struct_v);
ceph_decode_copy_safe(p, end, &fsid, sizeof(fsid), e_inval);
ceph_decode_32_safe(p, end, epoch, e_inval);
if (struct_v >= 6) {
u32 feat_struct_len;
u8 feat_struct_v;

ceph_decode_32_safe(&p, end, len, bad);
ceph_decode_need(&p, end, len, bad);
*p += sizeof(struct ceph_timespec); /* skip last_changed */
*p += sizeof(struct ceph_timespec); /* skip created */

dout("monmap_decode %p %p len %d (%d)\n", p, end, len, (int)(end-p));
p += sizeof(u16); /* skip version */
ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
&feat_struct_v, &feat_struct_len);
if (ret)
goto fail;

ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
ceph_decode_copy(&p, &fsid, sizeof(fsid));
epoch = ceph_decode_32(&p);
*p += feat_struct_len; /* skip persistent_features */

num_mon = ceph_decode_32(&p);
ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
&feat_struct_v, &feat_struct_len);
if (ret)
goto fail;

*p += feat_struct_len; /* skip optional_features */
}
ceph_decode_32_safe(p, end, num_mon, e_inval);

dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch,
num_mon);
if (num_mon > CEPH_MAX_MON)
goto bad;
m = kmalloc(struct_size(m, mon_inst, num_mon), GFP_NOFS);
if (m == NULL)
return ERR_PTR(-ENOMEM);
m->fsid = fsid;
m->epoch = epoch;
m->num_mon = num_mon;
for (i = 0; i < num_mon; ++i) {
struct ceph_entity_inst *inst = &m->mon_inst[i];

/* copy name portion */
ceph_decode_copy_safe(&p, end, &inst->name,
sizeof(inst->name), bad);
err = ceph_decode_entity_addr(&p, end, &inst->addr);
if (err)
goto bad;
goto e_inval;

monmap = kmalloc(struct_size(monmap, mon_inst, num_mon), GFP_NOIO);
if (!monmap) {
ret = -ENOMEM;
goto fail;
}
dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
m->num_mon);
for (i = 0; i < m->num_mon; i++)
dout("monmap_decode mon%d is %s\n", i,
ceph_pr_addr(&m->mon_inst[i].addr));
return m;
bad:
dout("monmap_decode failed with %d\n", err);
kfree(m);
return ERR_PTR(err);
monmap->fsid = fsid;
monmap->epoch = epoch;
monmap->num_mon = num_mon;

/* legacy_mon_addr map or mon_info map */
for (i = 0; i < num_mon; i++) {
struct ceph_entity_inst *inst = &monmap->mon_inst[i];

ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
inst->name.type = CEPH_ENTITY_TYPE_MON;
inst->name.num = cpu_to_le64(i);

if (struct_v >= 6)
ret = decode_mon_info(p, end, msgr2, &inst->addr);
else
ret = ceph_decode_entity_addr(p, end, &inst->addr);
if (ret)
goto fail;

dout("%s mon%d addr %s\n", __func__, i,
ceph_pr_addr(&inst->addr));
}

return monmap;

e_inval:
ret = -EINVAL;
fail:
kfree(monmap);
return ERR_PTR(ret);
}

/*
Expand Down Expand Up @@ -476,7 +541,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
p = msg->front.iov_base;
end = p + msg->front.iov_len;

monmap = ceph_monmap_decode(p, end);
monmap = ceph_monmap_decode(&p, end, false);
if (IS_ERR(monmap)) {
pr_err("problem decoding monmap, %d\n",
(int)PTR_ERR(monmap));
Expand Down
4 changes: 2 additions & 2 deletions net/ceph/osd_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -3918,9 +3918,9 @@ static int handle_one_map(struct ceph_osd_client *osdc,
set_pool_was_full(osdc);

if (incremental)
newmap = osdmap_apply_incremental(&p, end, osdc->osdmap);
newmap = osdmap_apply_incremental(&p, end, false, osdc->osdmap);
else
newmap = ceph_osdmap_decode(&p, end);
newmap = ceph_osdmap_decode(&p, end, false);
if (IS_ERR(newmap))
return PTR_ERR(newmap);

Expand Down
Loading

0 comments on commit a5cbd5f

Please sign in to comment.