Skip to content

Commit

Permalink
sctp: Add GSO support
Browse files Browse the repository at this point in the history
SCTP has this pecualiarity that its packets cannot be just segmented to
(P)MTU. Its chunks must be contained in IP segments, padding respected.
So we can't just generate a big skb, set gso_size to the fragmentation
point and deliver it to IP layer.

This patch takes a different approach. SCTP will now build a skb as it
would be if it was received using GRO. That is, there will be a cover
skb with protocol headers and children ones containing the actual
segments, already segmented to a way that respects SCTP RFCs.

With that, we can tell skb_segment() to just split based on frag_list,
trusting its sizes are already in accordance.

This way SCTP can benefit from GSO and instead of passing several
packets through the stack, it can pass a single large packet.

v2:
- Added support for receiving GSO frames, as requested by Dave Miller.
- Clear skb->cb if packet is GSO (otherwise it's not used by SCTP)
- Added heuristics similar to what we have in TCP for not generating
  single GSO packets that fills cwnd.
v3:
- consider sctphdr size in skb_gso_transport_seglen()
- rebased due to 5c7cdf3 ("gso: Remove arbitrary checks for
  unsupported GSO")

Signed-off-by: Marcelo Ricardo Leitner <[email protected]>
Tested-by: Xin Long <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
marceloleitner authored and davem330 committed Jun 3, 2016
1 parent 3acb50c commit 90017ac
Show file tree
Hide file tree
Showing 14 changed files with 429 additions and 126 deletions.
7 changes: 5 additions & 2 deletions include/linux/netdev_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ enum {
* headers in software.
*/
NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
NETIF_F_GSO_SCTP_BIT,

NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
Expand Down Expand Up @@ -128,6 +129,7 @@ enum {
#define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID)
#define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL)
#define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
#define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP)
#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
#define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX)
#define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)
Expand Down Expand Up @@ -166,7 +168,8 @@ enum {
NETIF_F_FSO)

/* List of features with software fallbacks. */
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO)
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \
NETIF_F_GSO_SCTP)

/*
* If one device supports one of these features, then enable them
Expand Down
1 change: 1 addition & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -4012,6 +4012,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));

return (features & feature) == feature;
}
Expand Down
2 changes: 2 additions & 0 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,8 @@ enum {
SKB_GSO_PARTIAL = 1 << 13,

SKB_GSO_TUNNEL_REMCSUM = 1 << 14,

SKB_GSO_SCTP = 1 << 15,
};

#if BITS_PER_LONG > 32
Expand Down
4 changes: 4 additions & 0 deletions include/net/sctp/sctp.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net);
int sctp_remaddr_proc_init(struct net *net);
void sctp_remaddr_proc_exit(struct net *net);

/*
* sctp/offload.c
*/
int sctp_offload_init(void);

/*
* Module global variables
Expand Down
5 changes: 5 additions & 0 deletions include/net/sctp/structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,9 @@ struct sctp_chunk {
/* This points to the sk_buff containing the actual data. */
struct sk_buff *skb;

/* In case of GSO packets, this will store the head one */
struct sk_buff *head_skb;

/* These are the SCTP headers by reverse order in a packet.
* Note that some of these may happen more than once. In that
* case, we point at the "current" one, whatever that means
Expand Down Expand Up @@ -696,6 +699,8 @@ struct sctp_packet {
size_t overhead;
/* This is the total size of all chunks INCLUDING padding. */
size_t size;
/* This is the maximum size this packet may have */
size_t max_size;

/* The packet is destined for this transport address.
* The function we finally use to pass down to the next lower
Expand Down
1 change: 1 addition & 0 deletions net/core/ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",

[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
Expand Down
3 changes: 3 additions & 0 deletions net/core/skbuff.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include <linux/slab.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/sctp.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h>
Expand Down Expand Up @@ -4383,6 +4384,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
thlen += inner_tcp_hdrlen(skb);
} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
thlen = tcp_hdrlen(skb);
} else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
thlen = sizeof(struct sctphdr);
}
/* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already
Expand Down
3 changes: 2 additions & 1 deletion net/sctp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
transport.o chunk.o sm_make_chunk.o ulpevent.o \
inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o ssnmap.o auth.o
output.o input.o debug.o ssnmap.o auth.o \
offload.o

sctp_probe-y := probe.o

Expand Down
12 changes: 11 additions & 1 deletion net/sctp/input.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,9 @@ int sctp_rcv(struct sk_buff *skb)
skb->csum_valid = 0; /* Previous value not applicable */
if (skb_csum_unnecessary(skb))
__skb_decr_checksum_unnecessary(skb);
else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
else if (!sctp_checksum_disable &&
!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb->csum_valid = 1;

Expand Down Expand Up @@ -1175,6 +1177,14 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
{
sctp_chunkhdr_t *ch;

/* We do not allow GSO frames here as we need to linearize and
* then cannot guarantee frame boundaries. This shouldn't be an
* issue as packets hitting this are mostly INIT or INIT-ACK and
* those cannot be on GSO-style anyway.
*/
if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
return NULL;

if (skb_linearize(skb))
return NULL;

Expand Down
51 changes: 43 additions & 8 deletions net/sctp/inqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,17 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
if (chunk->singleton ||
chunk->end_of_packet ||
chunk->pdiscard) {
if (chunk->head_skb == chunk->skb) {
chunk->skb = skb_shinfo(chunk->skb)->frag_list;
goto new_skb;
}
if (chunk->skb->next) {
chunk->skb = chunk->skb->next;
goto new_skb;
}

if (chunk->head_skb)
chunk->skb = chunk->head_skb;
sctp_chunk_free(chunk);
chunk = queue->in_progress = NULL;
} else {
Expand All @@ -155,15 +166,15 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)

next_chunk:
/* Is the queue empty? */
if (list_empty(&queue->in_chunk_list))
entry = sctp_list_dequeue(&queue->in_chunk_list);
if (!entry)
return NULL;

entry = queue->in_chunk_list.next;
chunk = list_entry(entry, struct sctp_chunk, list);
list_del_init(entry);

/* Linearize if it's not GSO */
if (skb_is_nonlinear(chunk->skb)) {
if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
skb_is_nonlinear(chunk->skb)) {
if (skb_linearize(chunk->skb)) {
__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
sctp_chunk_free(chunk);
Expand All @@ -174,15 +185,39 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
chunk->sctp_hdr = sctp_hdr(chunk->skb);
}

if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
/* GSO-marked skbs but without frags, handle
* them normally
*/
if (skb_shinfo(chunk->skb)->frag_list)
chunk->head_skb = chunk->skb;

/* skbs with "cover letter" */
if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len)
chunk->skb = skb_shinfo(chunk->skb)->frag_list;

if (WARN_ON(!chunk->skb)) {
__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
sctp_chunk_free(chunk);
goto next_chunk;
}
}

if (chunk->asoc)
sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);

queue->in_progress = chunk;

new_skb:
/* This is the first chunk in the packet. */
chunk->singleton = 1;
ch = (sctp_chunkhdr_t *) chunk->skb->data;
chunk->singleton = 1;
chunk->data_accepted = 0;

if (chunk->asoc)
sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);
chunk->pdiscard = 0;
chunk->auth = 0;
chunk->has_asconf = 0;
chunk->end_of_packet = 0;
chunk->ecn_ce_done = 0;
}

chunk->chunk_hdr = ch;
Expand Down
98 changes: 98 additions & 0 deletions net/sctp/offload.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* sctp_offload - GRO/GSO Offloading for SCTP
*
* Copyright (C) 2015, Marcelo Ricardo Leitner <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/socket.h>
#include <linux/sctp.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/kfifo.h>
#include <linux/time.h>
#include <net/net_namespace.h>

#include <linux/skbuff.h>
#include <net/sctp/sctp.h>
#include <net/sctp/checksum.h>
#include <net/protocol.h>

static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
{
skb->ip_summed = CHECKSUM_NONE;
return sctp_compute_cksum(skb, skb_transport_offset(skb));
}

static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;

sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(*sh)))
goto out;

__skb_pull(skb, sizeof(*sh));

if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
struct skb_shared_info *pinfo = skb_shinfo(skb);
struct sk_buff *frag_iter;

pinfo->gso_segs = 0;
if (skb->len != skb->data_len) {
/* Means we have chunks in here too */
pinfo->gso_segs++;
}

skb_walk_frags(skb, frag_iter)
pinfo->gso_segs++;

segs = NULL;
goto out;
}

segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
if (IS_ERR(segs))
goto out;

/* All that is left is update SCTP CRC if necessary */
if (!(features & NETIF_F_SCTP_CRC)) {
for (skb = segs; skb; skb = skb->next) {
if (skb->ip_summed == CHECKSUM_PARTIAL) {
sh = sctp_hdr(skb);
sh->checksum = sctp_gso_make_checksum(skb);
}
}
}

out:
return segs;
}

static const struct net_offload sctp_offload = {
.callbacks = {
.gso_segment = sctp_gso_segment,
},
};

int __init sctp_offload_init(void)
{
return inet_add_offload(&sctp_offload, IPPROTO_SCTP);
}
Loading

0 comments on commit 90017ac

Please sign in to comment.