Skip to content

Commit

Permalink
tipc: change socket buffer overflow control to respect sk_rcvbuf
Browse files Browse the repository at this point in the history
As per feedback from the netdev community, we change the buffer
overflow protection algorithm in receiving sockets so that it
always respects the nominal upper limit set in sk_rcvbuf.

Instead of scaling up from a small sk_rcvbuf value, which leads to
violation of the configured sk_rcvbuf limit, we now calculate the
weighted per-message limit by scaling down from a much bigger value,
still in the same field, according to the importance priority of the
received message.

To allow for administrative tunability of the socket receive buffer
size, we create a tipc_rmem sysctl variable to allow the user to
configure an even bigger value via sysctl command.  It is a size of
three (min/default/max) to be consistent with things like tcp_rmem.

By default, the value initialized in tipc_rmem[1] is equal to the
receive socket size needed by a TIPC_CRITICAL_IMPORTANCE message.
This value is also set as the default value of sk_rcvbuf.

Originally-by: Jon Maloy <[email protected]>
Cc: Neil Horman <[email protected]>
Cc: Jon Maloy <[email protected]>
[Ying: added sysctl variation to Jon's original patch]
Signed-off-by: Ying Xue <[email protected]>
[PG: don't compile sysctl.c if not config'd; add Documentation]
Signed-off-by: Paul Gortmaker <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
ying-xue authored and davem330 committed Jun 17, 2013
1 parent 8941bbc commit cc79dd1
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 12 deletions.
17 changes: 16 additions & 1 deletion Documentation/sysctl/net.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Table : Subdirectories in /proc/sys/net
ipv4 IP version 4 x25 X.25 protocol
ipx IPX token-ring IBM token ring
bridge Bridging decnet DEC net
ipv6 IP version 6
ipv6 IP version 6 tipc TIPC
..............................................................................

1. /proc/sys/net/core - Network core options
Expand Down Expand Up @@ -207,3 +207,18 @@ IPX.
The /proc/net/ipx_route table holds a list of IPX routes. For each route it
gives the destination network, the router node (or Directly) and the network
address of the router (or Connected) for internal networks.

6. TIPC
-------------------------------------------------------

The TIPC protocol now has a tunable for the receive memory, similar to the
tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max)

# cat /proc/sys/net/tipc/tipc_rmem
4252725 34021800 68043600
#

The max value is set to CONN_OVERLOAD_LIMIT, and the default and min values
are scaled (shifted) versions of that same value. Note that the min value
is not at this point in time used in any meaningful way, but the triplet is
preserved in order to be consistent with things like tcp_rmem.
1 change: 1 addition & 0 deletions net/tipc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ tipc-y += addr.o bcast.o bearer.o config.o \
socket.o log.o eth_media.o

tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
tipc-$(CONFIG_SYSCTL) += sysctl.o
12 changes: 10 additions & 2 deletions net/tipc/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "name_table.h"
#include "subscr.h"
#include "config.h"
#include "port.h"

#include <linux/module.h>

Expand All @@ -50,7 +51,7 @@ u32 tipc_own_addr __read_mostly;
int tipc_max_ports __read_mostly;
int tipc_net_id __read_mostly;
int tipc_remote_management __read_mostly;

int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */

/**
* tipc_buf_acquire - creates a TIPC message buffer
Expand Down Expand Up @@ -118,6 +119,7 @@ static void tipc_core_stop(void)
tipc_nametbl_stop();
tipc_ref_table_stop();
tipc_socket_stop();
tipc_unregister_sysctl();
}

/**
Expand All @@ -142,13 +144,14 @@ static int tipc_core_start(void)
res = tipc_netlink_start();
if (!res)
res = tipc_socket_init();
if (!res)
res = tipc_register_sysctl();
if (res)
tipc_core_stop();

return res;
}


static int __init tipc_init(void)
{
int res;
Expand All @@ -160,6 +163,11 @@ static int __init tipc_init(void)
tipc_max_ports = CONFIG_TIPC_PORTS;
tipc_net_id = 4711;

sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE;
sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 <<
TIPC_CRITICAL_IMPORTANCE;
sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT;

res = tipc_core_start();
if (res)
pr_err("Unable to start in single node mode\n");
Expand Down
9 changes: 9 additions & 0 deletions net/tipc/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ extern u32 tipc_own_addr __read_mostly;
extern int tipc_max_ports __read_mostly;
extern int tipc_net_id __read_mostly;
extern int tipc_remote_management __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;

/*
* Other global variables
Expand All @@ -97,6 +98,14 @@ extern void tipc_netlink_stop(void);
extern int tipc_socket_init(void);
extern void tipc_socket_stop(void);

#ifdef CONFIG_SYSCTL
extern int tipc_register_sysctl(void);
extern void tipc_unregister_sysctl(void);
#else
#define tipc_register_sysctl() 0
#define tipc_unregister_sysctl()
#endif

/*
* TIPC timer and signal code
*/
Expand Down
2 changes: 2 additions & 0 deletions net/tipc/port.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
#include "node_subscr.h"

#define TIPC_FLOW_CONTROL_WIN 512
#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))

typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref,
struct sk_buff **buf, unsigned char const *data,
Expand Down
19 changes: 10 additions & 9 deletions net/tipc/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@
#define SS_LISTENING -1 /* socket is listening */
#define SS_READY -2 /* socket is connectionless */

#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */

struct tipc_sock {
Expand Down Expand Up @@ -203,6 +201,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,

sock_init_data(sock, sk);
sk->sk_backlog_rcv = backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
tipc_sk(sk)->p = tp_ptr;
Expand Down Expand Up @@ -1233,10 +1232,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
* For all connectionless messages, by default new queue limits are
* as belows:
*
* TIPC_LOW_IMPORTANCE (5MB)
* TIPC_MEDIUM_IMPORTANCE (10MB)
* TIPC_HIGH_IMPORTANCE (20MB)
* TIPC_CRITICAL_IMPORTANCE (40MB)
* TIPC_LOW_IMPORTANCE (4 MB)
* TIPC_MEDIUM_IMPORTANCE (8 MB)
* TIPC_HIGH_IMPORTANCE (16 MB)
* TIPC_CRITICAL_IMPORTANCE (32 MB)
*
* Returns overload limit according to corresponding message importance
*/
Expand All @@ -1246,9 +1245,10 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
unsigned int limit;

if (msg_connected(msg))
limit = CONN_OVERLOAD_LIMIT;
limit = sysctl_tipc_rmem[2];
else
limit = sk->sk_rcvbuf << (msg_importance(msg) + 5);
limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE <<
msg_importance(msg);
return limit;
}

Expand Down Expand Up @@ -1847,7 +1847,8 @@ static const struct net_proto_family tipc_family_ops = {
static struct proto tipc_proto = {
.name = "TIPC",
.owner = THIS_MODULE,
.obj_size = sizeof(struct tipc_sock)
.obj_size = sizeof(struct tipc_sock),
.sysctl_rmem = sysctl_tipc_rmem
};

/**
Expand Down
64 changes: 64 additions & 0 deletions net/tipc/sysctl.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* net/tipc/sysctl.c: sysctl interface to TIPC subsystem
*
* Copyright (c) 2013, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include "core.h"

#include <linux/sysctl.h>

static struct ctl_table_header *tipc_ctl_hdr;

static struct ctl_table tipc_table[] = {
{
.procname = "tipc_rmem",
.data = &sysctl_tipc_rmem,
.maxlen = sizeof(sysctl_tipc_rmem),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{}
};

int tipc_register_sysctl(void)
{
tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table);
if (tipc_ctl_hdr == NULL)
return -ENOMEM;
return 0;
}

void tipc_unregister_sysctl(void)
{
unregister_net_sysctl_table(tipc_ctl_hdr);
}

0 comments on commit cc79dd1

Please sign in to comment.