Skip to content

Commit

Permalink
Merge branch 'per-route-dctcp-receive-side'
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
tcp: receive-side per route dctcp handling

Original cover letter:

  Currently, the following case doesn't use DCTCP, even if it should:

    - responder has f.e. cubic as system wide default
    - 'ip route congctl dctcp $src' was set

  Then, DCTCP is NOT used if a DCTCP sender attempts to connect from a
  host in the $src range: ECT(0) is set, but listen_sk is not dctcp, so
  we fail the INET_ECN_is_not_ect sanity check.

  We also have to examine the dst used for the SYN/ACK reply to make
  this case work.

  In order to minimize additional cost, store the 'ecn is must have'
  information is the dst_features field.

  The set targets -next instead of -net since this doesn't seem to be a
  serious bug and to give the change more soak time until it hits linus
  tree.

v1 -> v2:
 - Addressed Dave's feedback, not exposing any bits to user space
 - Added patch 3 to reject incorrect configurations
 - Rest as is, rebased and retested
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Aug 31, 2015
2 parents 87583eb + c3a8d94 commit 9dc3064
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 56 deletions.
6 changes: 6 additions & 0 deletions include/net/dst.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
p[metric-1] = val;
}

/* Kernel-internal feature bits that are unallocated in user space. */
#define DST_FEATURE_ECN_CA (1 << 31)

#define DST_FEATURE_MASK (DST_FEATURE_ECN_CA)
#define DST_FEATURE_ECN_MASK (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN)

static inline u32
dst_feature(const struct dst_entry *dst, u32 feature)
{
Expand Down
2 changes: 1 addition & 1 deletion include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -888,7 +888,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
extern struct tcp_congestion_ops tcp_reno;

struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
u32 tcp_ca_get_key_by_name(const char *name);
u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca);
#ifdef CONFIG_INET
char *tcp_ca_get_name_by_key(u32 key, char *buffer);
#else
Expand Down
11 changes: 7 additions & 4 deletions include/uapi/linux/rtnetlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,13 @@ enum {

#define RTAX_MAX (__RTAX_MAX - 1)

#define RTAX_FEATURE_ECN 0x00000001
#define RTAX_FEATURE_SACK 0x00000002
#define RTAX_FEATURE_TIMESTAMP 0x00000004
#define RTAX_FEATURE_ALLFRAG 0x00000008
#define RTAX_FEATURE_ECN (1 << 0)
#define RTAX_FEATURE_SACK (1 << 1)
#define RTAX_FEATURE_TIMESTAMP (1 << 2)
#define RTAX_FEATURE_ALLFRAG (1 << 3)

#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | \
RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG)

struct rta_session {
__u8 proto;
Expand Down
6 changes: 6 additions & 0 deletions net/core/rtnetlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,12 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
continue;
if (nla_put_string(skb, i + 1, name))
goto nla_put_failure;
} else if (i == RTAX_FEATURES - 1) {
u32 user_features = metrics[i] & RTAX_FEATURE_MASK;

BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
if (nla_put_u32(skb, i + 1, user_features))
goto nla_put_failure;
} else {
if (nla_put_u32(skb, i + 1, metrics[i]))
goto nla_put_failure;
Expand Down
77 changes: 47 additions & 30 deletions net/ipv4/fib_semantics.c
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,50 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
return true;
}

static int
fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
{
bool ecn_ca = false;
struct nlattr *nla;
int remaining;

if (!cfg->fc_mx)
return 0;

nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
int type = nla_type(nla);
u32 val;

if (!type)
continue;
if (type > RTAX_MAX)
return -EINVAL;

if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];

nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC)
return -EINVAL;
} else {
val = nla_get_u32(nla);
}
if (type == RTAX_ADVMSS && val > 65535 - 40)
val = 65535 - 40;
if (type == RTAX_MTU && val > 65535 - 15)
val = 65535 - 15;
if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
return -EINVAL;
fi->fib_metrics[type - 1] = val;
}

if (ecn_ca)
fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;

return 0;
}

struct fib_info *fib_create_info(struct fib_config *cfg)
{
int err;
Expand Down Expand Up @@ -948,36 +992,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto failure;
} endfor_nexthops(fi)

if (cfg->fc_mx) {
struct nlattr *nla;
int remaining;

nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
int type = nla_type(nla);

if (type) {
u32 val;

if (type > RTAX_MAX)
goto err_inval;
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];

nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(tmp);
if (val == TCP_CA_UNSPEC)
goto err_inval;
} else {
val = nla_get_u32(nla);
}
if (type == RTAX_ADVMSS && val > 65535 - 40)
val = 65535 - 40;
if (type == RTAX_MTU && val > 65535 - 15)
val = 65535 - 15;
fi->fib_metrics[type - 1] = val;
}
}
}
err = fib_convert_metrics(fi, cfg);
if (err)
goto failure;

if (cfg->fc_mp) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
Expand Down
9 changes: 6 additions & 3 deletions net/ipv4/tcp_cong.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,19 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
}
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);

u32 tcp_ca_get_key_by_name(const char *name)
u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
{
const struct tcp_congestion_ops *ca;
u32 key;
u32 key = TCP_CA_UNSPEC;

might_sleep();

rcu_read_lock();
ca = __tcp_ca_find_autoload(name);
key = ca ? ca->key : TCP_CA_UNSPEC;
if (ca) {
key = ca->key;
*ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
}
rcu_read_unlock();

return key;
Expand Down
7 changes: 5 additions & 2 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -6003,14 +6003,17 @@ static void tcp_ecn_create_request(struct request_sock *req,
const struct net *net = sock_net(listen_sk);
bool th_ecn = th->ece && th->cwr;
bool ect, ecn_ok;
u32 ecn_ok_dst;

if (!th_ecn)
return;

ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN);
ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;

if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk))
if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
(ecn_ok_dst & DST_FEATURE_ECN_CA))
inet_rsk(req)->ecn_ok = 1;
}

Expand Down
39 changes: 23 additions & 16 deletions net/ipv6/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -1698,6 +1698,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
static int ip6_convert_metrics(struct mx6_config *mxc,
const struct fib6_config *cfg)
{
bool ecn_ca = false;
struct nlattr *nla;
int remaining;
u32 *mp;
Expand All @@ -1711,30 +1712,36 @@ static int ip6_convert_metrics(struct mx6_config *mxc,

nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
int type = nla_type(nla);
u32 val;

if (type) {
u32 val;
if (!type)
continue;
if (unlikely(type > RTAX_MAX))
goto err;

if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];

if (unlikely(type > RTAX_MAX))
nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC)
goto err;
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
} else {
val = nla_get_u32(nla);
}
if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
goto err;

nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(tmp);
if (val == TCP_CA_UNSPEC)
goto err;
} else {
val = nla_get_u32(nla);
}
mp[type - 1] = val;
__set_bit(type - 1, mxc->mx_valid);
}

mp[type - 1] = val;
__set_bit(type - 1, mxc->mx_valid);
}
if (ecn_ca) {
__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
}

mxc->mx = mp;

return 0;
err:
kfree(mp);
Expand Down

0 comments on commit 9dc3064

Please sign in to comment.