Skip to content

Commit

Permalink
Merge branch 'dropcount'
Browse files Browse the repository at this point in the history
Eyal Birger says:

====================
net: move skb->dropcount to skb->cb[]

Commit 9777500 ("af_packet: add interframe drop cmsg (v6)")
unionized skb->mark and skb->dropcount in order to allow recording
of the socket drop count while maintaining struct sk_buff size.

skb->dropcount was introduced since there was no available room
in skb->cb[] in packet sockets. However, its introduction led to
the inability to export skb->mark to userspace.

It was considered to alias skb->priority instead of skb->mark.
However, that would lead to the inabilty to export skb->priority
to userspace if desired. Such change may also lead to hard-to-find
issues as skb->priority is assumed to be alias free, and, as noted
by Shmulik Ladkani, is not 'naturally orthogonal' with other skb
fields.

This patch series follows the suggestions made by Eric Dumazet moving
the dropcount metric to skb->cb[], eliminating this problem
at the expense of 4 bytes less in skb->cb[] for protocol families
using it.

The patch series include compactization of bluetooth and packet
use of skb->cb[] as well as the infrastructure for placing dropcount
in skb->cb[].
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Mar 2, 2015
2 parents 287f3a9 + 744d5a3 commit 6556c38
Show file tree
Hide file tree
Showing 18 changed files with 79 additions and 48 deletions.
2 changes: 0 additions & 2 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @napi_id: id of the NAPI struct this skb came from
* @secmark: security marking
* @mark: Generic packet mark
* @dropcount: total number of sk_receive_queue overflows
* @vlan_proto: vlan encapsulation protocol
* @vlan_tci: vlan tag control information
* @inner_protocol: Protocol (encapsulation)
Expand Down Expand Up @@ -641,7 +640,6 @@ struct sk_buff {
#endif
union {
__u32 mark;
__u32 dropcount;
__u32 reserved_tailroom;
};

Expand Down
14 changes: 5 additions & 9 deletions include/net/bluetooth/bluetooth.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,21 +275,17 @@ struct hci_dev;

typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);

struct hci_req_ctrl {
bool start;
u8 event;
hci_req_complete_t complete;
};

struct bt_skb_cb {
__u8 pkt_type;
__u8 incoming;
__u8 force_active;
__u16 opcode;
__u16 expect;
__u8 force_active;
__u8 incoming:1;
__u8 req_start:1;
u8 req_event;
hci_req_complete_t req_complete;
struct l2cap_chan *chan;
struct l2cap_ctrl control;
struct hci_req_ctrl req;
bdaddr_t bdaddr;
__le16 psm;
};
Expand Down
23 changes: 23 additions & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -2078,6 +2078,29 @@ static inline int sock_intr_errno(long timeo)
return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
}

struct sock_skb_cb {
u32 dropcount;
};

/* Store sock_skb_cb at the end of skb->cb[] so protocol families
* using skb->cb[] would keep using it directly and utilize its
* alignement guarantee.
*/
#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
sizeof(struct sock_skb_cb)))

#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
SOCK_SKB_CB_OFFSET))

#define sock_skb_cb_check_size(size) \
BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)

static inline void
sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
{
SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
}

void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
Expand Down
3 changes: 1 addition & 2 deletions net/bluetooth/af_bluetooth.c
Original file line number Diff line number Diff line change
Expand Up @@ -711,10 +711,9 @@ EXPORT_SYMBOL_GPL(bt_debugfs);

static int __init bt_init(void)
{
struct sk_buff *skb;
int err;

BUILD_BUG_ON(sizeof(struct bt_skb_cb) > sizeof(skb->cb));
sock_skb_cb_check_size(sizeof(struct bt_skb_cb));

BT_INFO("Core ver %s", VERSION);

Expand Down
12 changes: 6 additions & 6 deletions net/bluetooth/hci_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -3517,7 +3517,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
bt_cb(skb)->req.start = true;
bt_cb(skb)->req_start = 1;

skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
Expand Down Expand Up @@ -4195,7 +4195,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev)
if (!skb)
return true;

return bt_cb(skb)->req.start;
return bt_cb(skb)->req_start;
}

static void hci_resend_last(struct hci_dev *hdev)
Expand Down Expand Up @@ -4255,14 +4255,14 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
* command queue (hdev->cmd_q).
*/
if (hdev->sent_cmd) {
req_complete = bt_cb(hdev->sent_cmd)->req.complete;
req_complete = bt_cb(hdev->sent_cmd)->req_complete;

if (req_complete) {
/* We must set the complete callback to NULL to
* avoid calling the callback more than once if
* this function gets called again.
*/
bt_cb(hdev->sent_cmd)->req.complete = NULL;
bt_cb(hdev->sent_cmd)->req_complete = NULL;

goto call_complete;
}
Expand All @@ -4271,12 +4271,12 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
/* Remove all pending commands belonging to this request */
spin_lock_irqsave(&hdev->cmd_q.lock, flags);
while ((skb = __skb_dequeue(&hdev->cmd_q))) {
if (bt_cb(skb)->req.start) {
if (bt_cb(skb)->req_start) {
__skb_queue_head(&hdev->cmd_q, skb);
break;
}

req_complete = bt_cb(skb)->req.complete;
req_complete = bt_cb(skb)->req_complete;
kfree_skb(skb);
}
spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
Expand Down
4 changes: 2 additions & 2 deletions net/bluetooth/hci_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -3106,7 +3106,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
cancel_delayed_work(&hdev->cmd_timer);

if (ev->status ||
(hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
(hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req_event))
hci_req_cmd_complete(hdev, opcode, ev->status);

if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
Expand Down Expand Up @@ -5039,7 +5039,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)

skb_pull(skb, HCI_EVENT_HDR_SIZE);

if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req_event == event) {
struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
u16 opcode = __le16_to_cpu(cmd_hdr->opcode);

Expand Down
6 changes: 3 additions & 3 deletions net/bluetooth/hci_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
return -ENODATA;

skb = skb_peek_tail(&req->cmd_q);
bt_cb(skb)->req.complete = complete;
bt_cb(skb)->req_complete = complete;

spin_lock_irqsave(&hdev->cmd_q.lock, flags);
skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
Expand Down Expand Up @@ -116,9 +116,9 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
}

if (skb_queue_empty(&req->cmd_q))
bt_cb(skb)->req.start = true;
bt_cb(skb)->req_start = 1;

bt_cb(skb)->req.event = event;
bt_cb(skb)->req_event = event;

skb_queue_tail(&req->cmd_q, skb);
}
Expand Down
2 changes: 1 addition & 1 deletion net/bluetooth/hci_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
bt_cb(skb)->req.start = true;
bt_cb(skb)->req_start = 1;

skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
Expand Down
2 changes: 1 addition & 1 deletion net/can/bcm.c
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
* containing the interface index.
*/

BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
sock_skb_cb_check_size(sizeof(struct sockaddr_can));
addr = (struct sockaddr_can *)skb->cb;
memset(addr, 0, sizeof(*addr));
addr->can_family = AF_CAN;
Expand Down
6 changes: 3 additions & 3 deletions net/can/raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ struct raw_sock {
*/
static inline unsigned int *raw_flags(struct sk_buff *skb)
{
BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(struct sockaddr_can) +
sizeof(unsigned int)));
sock_skb_cb_check_size(sizeof(struct sockaddr_can) +
sizeof(unsigned int));

/* return pointer after struct sockaddr_can */
return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]);
Expand Down Expand Up @@ -135,7 +135,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
* containing the interface index.
*/

BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
sock_skb_cb_check_size(sizeof(struct sockaddr_can));
addr = (struct sockaddr_can *)skb->cb;
memset(addr, 0, sizeof(*addr));
addr->can_family = AF_CAN;
Expand Down
2 changes: 1 addition & 1 deletion net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
skb_dst_force(skb);

spin_lock_irqsave(&list->lock, flags);
skb->dropcount = atomic_read(&sk->sk_drops);
sock_skb_set_dropcount(sk, skb);
__skb_queue_tail(list, skb);
spin_unlock_irqrestore(&list->lock, flags);

Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/af_inet.c
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,7 @@ static int __init inet_init(void)
struct list_head *r;
int rc = -EINVAL;

BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
sock_skb_cb_check_size(sizeof(struct inet_skb_parm));

rc = proto_register(&tcp_prot, 1);
if (rc)
Expand Down
3 changes: 1 addition & 2 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3005,12 +3005,11 @@ static void __init tcp_init_mem(void)

void __init tcp_init(void)
{
struct sk_buff *skb = NULL;
unsigned long limit;
int max_rshare, max_wshare, cnt;
unsigned int i;

BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
sock_skb_cb_check_size(sizeof(struct tcp_skb_cb));

percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
Expand Down
2 changes: 1 addition & 1 deletion net/ipv6/af_inet6.c
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ static int __init inet6_init(void)
struct list_head *r;
int err = 0;

BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));

/* Register the socket-side information for inet6_create. */
for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
Expand Down
35 changes: 26 additions & 9 deletions net/packet/af_packet.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,16 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
static void packet_flush_mclist(struct sock *sk);

struct packet_skb_cb {
unsigned int origlen;
union {
struct sockaddr_pkt pkt;
struct sockaddr_ll ll;
union {
/* Trick: alias skb original length with
* ll.sll_family and ll.protocol in order
* to save room.
*/
unsigned int origlen;
struct sockaddr_ll ll;
};
} sa;
};

Expand Down Expand Up @@ -1810,13 +1816,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
skb = nskb;
}

BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
sizeof(skb->cb));
sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);

sll = &PACKET_SKB_CB(skb)->sa.ll;
sll->sll_family = AF_PACKET;
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
if (unlikely(po->origdev))
sll->sll_ifindex = orig_dev->ifindex;
Expand All @@ -1825,7 +1828,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,

sll->sll_halen = dev_parse_header(skb, sll->sll_addr);

PACKET_SKB_CB(skb)->origlen = skb->len;
/* sll->sll_family and sll->sll_protocol are set in packet_recvmsg().
* Use their space for storing the original skb length.
*/
PACKET_SKB_CB(skb)->sa.origlen = skb->len;

if (pskb_trim(skb, snaplen))
goto drop_n_acct;
Expand All @@ -1839,7 +1845,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,

spin_lock(&sk->sk_receive_queue.lock);
po->stats.stats1.tp_packets++;
skb->dropcount = atomic_read(&sk->sk_drops);
sock_skb_set_dropcount(sk, skb);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk);
Expand Down Expand Up @@ -2883,6 +2889,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
struct sk_buff *skb;
int copied, err;
int vnet_hdr_len = 0;
unsigned int origlen = 0;

err = -EINVAL;
if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
Expand Down Expand Up @@ -2982,6 +2989,15 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
if (err)
goto out_free;

if (sock->type != SOCK_PACKET) {
struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;

/* Original length was stored in sockaddr_ll fields */
origlen = PACKET_SKB_CB(skb)->sa.origlen;
sll->sll_family = AF_PACKET;
sll->sll_protocol = skb->protocol;
}

sock_recv_ts_and_drops(msg, sk, skb);

if (msg->msg_name) {
Expand All @@ -2993,6 +3009,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_namelen = sizeof(struct sockaddr_pkt);
} else {
struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;

msg->msg_namelen = sll->sll_halen +
offsetof(struct sockaddr_ll, sll_addr);
}
Expand All @@ -3006,7 +3023,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
aux.tp_status = TP_STATUS_USER;
if (skb->ip_summed == CHECKSUM_PARTIAL)
aux.tp_status |= TP_STATUS_CSUMNOTREADY;
aux.tp_len = PACKET_SKB_CB(skb)->origlen;
aux.tp_len = origlen;
aux.tp_snaplen = skb->len;
aux.tp_mac = 0;
aux.tp_net = skb_network_offset(skb);
Expand Down
2 changes: 1 addition & 1 deletion net/rxrpc/ar-recvmsg.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
&call->conn->trans->peer->srx, len);
msg->msg_namelen = len;
}
sock_recv_ts_and_drops(msg, &rx->sk, skb);
sock_recv_timestamp(msg, &rx->sk, skb);
}

/* receive the message */
Expand Down
3 changes: 1 addition & 2 deletions net/sctp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1322,8 +1322,7 @@ static __init int sctp_init(void)
int max_share;
int order;

BUILD_BUG_ON(sizeof(struct sctp_ulpevent) >
sizeof(((struct sk_buff *) 0)->cb));
sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));

/* Allocate bind_bucket and chunk caches. */
status = -ENOBUFS;
Expand Down
4 changes: 2 additions & 2 deletions net/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -731,9 +731,9 @@ EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
sizeof(__u32), &skb->dropcount);
sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
}

void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
Expand Down

0 comments on commit 6556c38

Please sign in to comment.