Skip to content

Commit

Permalink
sctp: Implement quick failover draft from tsvwg
Browse files Browse the repository at this point in the history
I've seen several attempts recently made to do quick failover of sctp transports
by reducing various retransmit timers and counters.  While its possible to
implement a faster failover on multihomed sctp associations, its not
particularly robust, in that it can lead to unneeded retransmits, as well as
false connection failures due to intermittent latency on a network.

Instead, lets implement the new ietf quick failover draft found here:
http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05

This will let the sctp stack identify transports that have had a small number of
errors, and avoid using them quickly until their reliability can be
re-established.  I've tested this out on two virt guests connected via multiple
isolated virt networks and believe its in compliance with the above draft and
works well.

Signed-off-by: Neil Horman <[email protected]>
CC: Vlad Yasevich <[email protected]>
CC: Sridhar Samudrala <[email protected]>
CC: "David S. Miller" <[email protected]>
CC: [email protected]
CC: [email protected]
Acked-by: Vlad Yasevich <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
nhorman authored and davem330 committed Jul 22, 2012
1 parent e390648 commit 5aa93bc
Show file tree
Hide file tree
Showing 10 changed files with 221 additions and 15 deletions.
14 changes: 14 additions & 0 deletions Documentation/networking/ip-sysctl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1440,6 +1440,20 @@ path_max_retrans - INTEGER

Default: 5

pf_retrans - INTEGER
The number of retransmissions that will be attempted on a given path
before traffic is redirected to an alternate transport (should one
exist). Note this is distinct from path_max_retrans, as a path that
passes the pf_retrans threshold can still be used. Its only
deprioritized when a transmission path is selected by the stack. This
setting is primarily used to enable fast failover mechanisms without
having to reduce path_max_retrans to a very low value. See:
http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
for details. Note also that a value of pf_retrans > path_max_retrans
disables this feature

Default: 0

rto_initial - INTEGER
The initial round trip timeout value in milliseconds that will be used
in calculating round trip times. This is the initial time interval
Expand Down
1 change: 1 addition & 0 deletions include/net/sctp/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ typedef enum {
typedef enum {
SCTP_TRANSPORT_UP,
SCTP_TRANSPORT_DOWN,
SCTP_TRANSPORT_PF,
} sctp_transport_cmd_t;

/* These are the address scopes defined mainly for IPv4 addresses
Expand Down
20 changes: 19 additions & 1 deletion include/net/sctp/structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ extern struct sctp_globals {
int max_retrans_path;
int max_retrans_init;

/* Potentially-Failed.Max.Retrans sysctl value
* taken from:
* http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05
*/
int pf_retrans;

/*
* Policy for preforming sctp/socket accounting
* 0 - do socket level accounting, all assocs share sk_sndbuf
Expand Down Expand Up @@ -258,6 +264,7 @@ extern struct sctp_globals {
#define sctp_sndbuf_policy (sctp_globals.sndbuf_policy)
#define sctp_rcvbuf_policy (sctp_globals.rcvbuf_policy)
#define sctp_max_retrans_path (sctp_globals.max_retrans_path)
#define sctp_pf_retrans (sctp_globals.pf_retrans)
#define sctp_max_retrans_init (sctp_globals.max_retrans_init)
#define sctp_sack_timeout (sctp_globals.sack_timeout)
#define sctp_hb_interval (sctp_globals.hb_interval)
Expand Down Expand Up @@ -990,10 +997,15 @@ struct sctp_transport {

/* This is the max_retrans value for the transport and will
* be initialized from the assocs value. This can be changed
* using SCTP_SET_PEER_ADDR_PARAMS socket option.
* using the SCTP_SET_PEER_ADDR_PARAMS socket option.
*/
__u16 pathmaxrxt;

/* This is the partially failed retrans value for the transport
* and will be initialized from the assocs value. This can be changed
* using the SCTP_PEER_ADDR_THLDS socket option
*/
int pf_retrans;
/* PMTU : The current known path MTU. */
__u32 pathmtu;

Expand Down Expand Up @@ -1664,6 +1676,12 @@ struct sctp_association {
*/
int max_retrans;

/* This is the partially failed retrans value for the transport
* and will be initialized from the assocs value. This can be
* changed using the SCTP_PEER_ADDR_THLDS socket option
*/
int pf_retrans;

/* Maximum number of times the endpoint will retransmit INIT */
__u16 max_init_attempts;

Expand Down
11 changes: 11 additions & 0 deletions include/net/sctp/user.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ typedef __s32 sctp_assoc_t;
#define SCTP_GET_ASSOC_NUMBER 28 /* Read only */
#define SCTP_GET_ASSOC_ID_LIST 29 /* Read only */
#define SCTP_AUTO_ASCONF 30
#define SCTP_PEER_ADDR_THLDS 31

/* Internal Socket Options. Some of the sctp library functions are
* implemented using these socket options.
Expand Down Expand Up @@ -649,6 +650,7 @@ struct sctp_paddrinfo {
*/
enum sctp_spinfo_state {
SCTP_INACTIVE,
SCTP_PF,
SCTP_ACTIVE,
SCTP_UNCONFIRMED,
SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */
Expand Down Expand Up @@ -741,4 +743,13 @@ typedef struct {
int sd;
} sctp_peeloff_arg_t;

/*
* Peer Address Thresholds socket option
*/
struct sctp_paddrthlds {
sctp_assoc_t spt_assoc_id;
struct sockaddr_storage spt_address;
__u16 spt_pathmaxrxt;
__u16 spt_pathpfthld;
};
#endif /* __net_sctp_user_h__ */
37 changes: 30 additions & 7 deletions net/sctp/associola.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
* socket values.
*/
asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt;
asoc->pf_retrans = sctp_pf_retrans;

asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial);
asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max);
asoc->rto_min = msecs_to_jiffies(sp->rtoinfo.srto_min);
Expand Down Expand Up @@ -686,6 +688,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
/* Set the path max_retrans. */
peer->pathmaxrxt = asoc->pathmaxrxt;

/* And the partial failure retrnas threshold */
peer->pf_retrans = asoc->pf_retrans;

/* Initialize the peer's SACK delay timeout based on the
* association configured value.
*/
Expand Down Expand Up @@ -841,6 +846,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
struct sctp_ulpevent *event;
struct sockaddr_storage addr;
int spc_state = 0;
bool ulp_notify = true;

/* Record the transition on the transport. */
switch (command) {
Expand All @@ -854,6 +860,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
spc_state = SCTP_ADDR_CONFIRMED;
else
spc_state = SCTP_ADDR_AVAILABLE;
/* Don't inform ULP about transition from PF to
* active state and set cwnd to 1, see SCTP
* Quick failover draft section 5.1, point 5
*/
if (transport->state == SCTP_PF) {
ulp_notify = false;
transport->cwnd = 1;
}
transport->state = SCTP_ACTIVE;
break;

Expand All @@ -872,19 +886,27 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
spc_state = SCTP_ADDR_UNREACHABLE;
break;

case SCTP_TRANSPORT_PF:
transport->state = SCTP_PF;
ulp_notify = false;
break;

default:
return;
}

/* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
* user.
*/
memset(&addr, 0, sizeof(struct sockaddr_storage));
memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len);
event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
0, spc_state, error, GFP_ATOMIC);
if (event)
sctp_ulpq_tail_event(&asoc->ulpq, event);
if (ulp_notify) {
memset(&addr, 0, sizeof(struct sockaddr_storage));
memcpy(&addr, &transport->ipaddr,
transport->af_specific->sockaddr_len);
event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
0, spc_state, error, GFP_ATOMIC);
if (event)
sctp_ulpq_tail_event(&asoc->ulpq, event);
}

/* Select new active and retran paths. */

Expand All @@ -900,7 +922,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
transports) {

if ((t->state == SCTP_INACTIVE) ||
(t->state == SCTP_UNCONFIRMED))
(t->state == SCTP_UNCONFIRMED) ||
(t->state == SCTP_PF))
continue;
if (!first || t->last_time_heard > first->last_time_heard) {
second = first;
Expand Down
6 changes: 4 additions & 2 deletions net/sctp/outqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -792,7 +792,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
if (!new_transport)
new_transport = asoc->peer.active_path;
} else if ((new_transport->state == SCTP_INACTIVE) ||
(new_transport->state == SCTP_UNCONFIRMED)) {
(new_transport->state == SCTP_UNCONFIRMED) ||
(new_transport->state == SCTP_PF)) {
/* If the chunk is Heartbeat or Heartbeat Ack,
* send it to chunk->transport, even if it's
* inactive.
Expand Down Expand Up @@ -987,7 +988,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
new_transport = chunk->transport;
if (!new_transport ||
((new_transport->state == SCTP_INACTIVE) ||
(new_transport->state == SCTP_UNCONFIRMED)))
(new_transport->state == SCTP_UNCONFIRMED) ||
(new_transport->state == SCTP_PF)))
new_transport = asoc->peer.active_path;
if (new_transport->state == SCTP_UNCONFIRMED)
continue;
Expand Down
33 changes: 29 additions & 4 deletions net/sctp/sm_sideeffect.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
sctp_cmd_seq_t *commands,
gfp_t gfp);

static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds,
struct sctp_transport *t);
/********************************************************************
* Helper functions
********************************************************************/
Expand Down Expand Up @@ -470,7 +472,8 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
* notification SHOULD be sent to the upper layer.
*
*/
static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
struct sctp_association *asoc,
struct sctp_transport *transport,
int is_hb)
{
Expand All @@ -495,6 +498,23 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
transport->error_count++;
}

/* If the transport error count is greater than the pf_retrans
* threshold, and less than pathmaxrtx, then mark this transport
* as Partially Failed, ee SCTP Quick Failover Draft, secon 5.1,
* point 1
*/
if ((transport->state != SCTP_PF) &&
(asoc->pf_retrans < transport->pathmaxrxt) &&
(transport->error_count > asoc->pf_retrans)) {

sctp_assoc_control_transport(asoc, transport,
SCTP_TRANSPORT_PF,
0);

/* Update the hb timer to resend a heartbeat every rto */
sctp_cmd_hb_timer_update(commands, transport);
}

if (transport->state != SCTP_INACTIVE &&
(transport->error_count > transport->pathmaxrxt)) {
SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p",
Expand Down Expand Up @@ -699,6 +719,10 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
SCTP_HEARTBEAT_SUCCESS);
}

if (t->state == SCTP_PF)
sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP,
SCTP_HEARTBEAT_SUCCESS);

/* The receiver of the HEARTBEAT ACK should also perform an
* RTT measurement for that destination transport address
* using the time value carried in the HEARTBEAT ACK chunk.
Expand Down Expand Up @@ -1565,8 +1589,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,

case SCTP_CMD_STRIKE:
/* Mark one strike against a transport. */
sctp_do_8_2_transport_strike(asoc, cmd->obj.transport,
0);
sctp_do_8_2_transport_strike(commands, asoc,
cmd->obj.transport, 0);
break;

case SCTP_CMD_TRANSPORT_IDLE:
Expand All @@ -1576,7 +1600,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,

case SCTP_CMD_TRANSPORT_HB_SENT:
t = cmd->obj.transport;
sctp_do_8_2_transport_strike(asoc, t, 1);
sctp_do_8_2_transport_strike(commands, asoc,
t, 1);
t->hb_sent = 1;
break;

Expand Down
Loading

0 comments on commit 5aa93bc

Please sign in to comment.