Skip to content

Commit

Permalink
Merge branch 'vsock-virtio' into main
Browse files Browse the repository at this point in the history
Luigi Leonardi says:

====================
vsock: avoid queuing on intermediate queue if possible

This series introduces an optimization for vsock/virtio to reduce latency
and increase the throughput: When the guest sends a packet to the host,
and the intermediate queue (send_pkt_queue) is empty, if there is enough
space, the packet is put directly in the virtqueue.

v3->v4
While running experiments on fio with 64B payload, I realized that there
was a mistake in my fio configuration, so I re-ran all the experiments
and now the latency numbers are indeed lower with the patch applied.
I also noticed that I was kicking the host without the lock.

- Fixed a configuration mistake on fio and re-ran all experiments.
- Fio latency measurement using 64B payload.
- virtio_transport_send_skb_fast_path sends kick with the tx_lock acquired
- Addressed all minor style changes requested by maintainer.
- Rebased on latest net-next
- Link to v3: https://lore.kernel.org/r/[email protected]

v2->v3
- Performed more experiments using iperf3 using multiple streams
- Handling of reply packets removed from virtio_transport_send_skb,
  as is needed just by the worker.
- Removed atomic_inc/atomic_sub when queuing directly to the vq.
- Introduced virtio_transport_send_skb_fast_path that handles the
  steps for sending on the vq.
- Fixed a missing mutex_unlock in error path.
- Changed authorship of the second commit
- Rebased on latest net-next

v1->v2
In this v2 I replaced a mutex_lock with a mutex_trylock because it was
insidea RCU critical section. I also added a check on tx_run, so if the
module is being removed the packet is not queued. I'd like to thank Stefano
for reporting the tx_run issue.

Applied all Stefano's suggestions:
    - Minor code style changes
    - Minor commit text rewrite
Performed more experiments:
     - Check if all the packets go directly to the vq (Matias' suggestion)
     - Used iperf3 to see if there is any improvement in overall throughput
      from guest to host
     - Pinned the vhost process to a pCPU.
     - Run fio using 512B payload
Rebased on latest net-next
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Aug 2, 2024
2 parents 5fe164f + 18ee44c commit 3361a6e
Show file tree
Hide file tree
Showing 10 changed files with 202 additions and 8 deletions.
4 changes: 3 additions & 1 deletion drivers/vhost/vsock.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
restart_tx = true;
}

consume_skb(skb);
virtio_transport_consume_skb_sent(skb, true);
}
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
if (added)
Expand Down Expand Up @@ -451,6 +451,8 @@ static struct virtio_transport vhost_transport = {
.notify_buffer_size = virtio_transport_notify_buffer_size,
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,

.unsent_bytes = virtio_transport_unsent_bytes,

.read_skb = virtio_transport_read_skb,
},

Expand Down
6 changes: 6 additions & 0 deletions include/linux/virtio_vsock.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ struct virtio_vsock_sock {
u32 tx_cnt;
u32 peer_fwd_cnt;
u32 peer_buf_alloc;
size_t bytes_unsent;

/* Protected by rx_lock */
u32 fwd_cnt;
Expand Down Expand Up @@ -193,6 +194,11 @@ s64 virtio_transport_stream_has_data(struct vsock_sock *vsk);
s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk);

ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk);

void virtio_transport_consume_skb_sent(struct sk_buff *skb,
bool consume);

int virtio_transport_do_socket_init(struct vsock_sock *vsk,
struct vsock_sock *psk);
int
Expand Down
3 changes: 3 additions & 0 deletions include/net/af_vsock.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ struct vsock_transport {
void (*notify_buffer_size)(struct vsock_sock *, u64 *);
int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val);

/* SIOCOUTQ ioctl */
ssize_t (*unsent_bytes)(struct vsock_sock *vsk);

/* Shutdown. */
int (*shutdown)(struct vsock_sock *, int);

Expand Down
58 changes: 55 additions & 3 deletions net/vmw_vsock/af_vsock.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
#include <net/sock.h>
#include <net/af_vsock.h>
#include <uapi/linux/vm_sockets.h>
#include <uapi/asm-generic/ioctls.h>

static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
static void vsock_sk_destruct(struct sock *sk);
Expand Down Expand Up @@ -1292,6 +1293,57 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);

static int vsock_do_ioctl(struct socket *sock, unsigned int cmd,
int __user *arg)
{
struct sock *sk = sock->sk;
struct vsock_sock *vsk;
int ret;

vsk = vsock_sk(sk);

switch (cmd) {
case SIOCOUTQ: {
ssize_t n_bytes;

if (!vsk->transport || !vsk->transport->unsent_bytes) {
ret = -EOPNOTSUPP;
break;
}

if (sock_type_connectible(sk->sk_type) && sk->sk_state == TCP_LISTEN) {
ret = -EINVAL;
break;
}

n_bytes = vsk->transport->unsent_bytes(vsk);
if (n_bytes < 0) {
ret = n_bytes;
break;
}

ret = put_user(n_bytes, arg);
break;
}
default:
ret = -ENOIOCTLCMD;
}

return ret;
}

static int vsock_ioctl(struct socket *sock, unsigned int cmd,
unsigned long arg)
{
int ret;

lock_sock(sock->sk);
ret = vsock_do_ioctl(sock, cmd, (int __user *)arg);
release_sock(sock->sk);

return ret;
}

static const struct proto_ops vsock_dgram_ops = {
.family = PF_VSOCK,
.owner = THIS_MODULE,
Expand All @@ -1302,7 +1354,7 @@ static const struct proto_ops vsock_dgram_ops = {
.accept = sock_no_accept,
.getname = vsock_getname,
.poll = vsock_poll,
.ioctl = sock_no_ioctl,
.ioctl = vsock_ioctl,
.listen = sock_no_listen,
.shutdown = vsock_shutdown,
.sendmsg = vsock_dgram_sendmsg,
Expand Down Expand Up @@ -2286,7 +2338,7 @@ static const struct proto_ops vsock_stream_ops = {
.accept = vsock_accept,
.getname = vsock_getname,
.poll = vsock_poll,
.ioctl = sock_no_ioctl,
.ioctl = vsock_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
.setsockopt = vsock_connectible_setsockopt,
Expand All @@ -2308,7 +2360,7 @@ static const struct proto_ops vsock_seqpacket_ops = {
.accept = vsock_accept,
.getname = vsock_getname,
.poll = vsock_poll,
.ioctl = sock_no_ioctl,
.ioctl = vsock_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
.setsockopt = vsock_connectible_setsockopt,
Expand Down
4 changes: 3 additions & 1 deletion net/vmw_vsock/virtio_transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ static void virtio_transport_tx_work(struct work_struct *work)

virtqueue_disable_cb(vq);
while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
consume_skb(skb);
virtio_transport_consume_skb_sent(skb, true);
added = true;
}
} while (!virtqueue_enable_cb(vq));
Expand Down Expand Up @@ -540,6 +540,8 @@ static struct virtio_transport virtio_transport = {
.notify_buffer_size = virtio_transport_notify_buffer_size,
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,

.unsent_bytes = virtio_transport_unsent_bytes,

.read_skb = virtio_transport_read_skb,
},

Expand Down
35 changes: 35 additions & 0 deletions net/vmw_vsock/virtio_transport_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,26 @@ void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *
}
EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);

void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume)
{
struct sock *s = skb->sk;

if (s && skb->len) {
struct vsock_sock *vs = vsock_sk(s);
struct virtio_vsock_sock *vvs;

vvs = vs->trans;

spin_lock_bh(&vvs->tx_lock);
vvs->bytes_unsent -= skb->len;
spin_unlock_bh(&vvs->tx_lock);
}

if (consume)
consume_skb(skb);
}
EXPORT_SYMBOL_GPL(virtio_transport_consume_skb_sent);

u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
{
u32 ret;
Expand All @@ -475,6 +495,7 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
if (ret > credit)
ret = credit;
vvs->tx_cnt += ret;
vvs->bytes_unsent += ret;
spin_unlock_bh(&vvs->tx_lock);

return ret;
Expand All @@ -488,6 +509,7 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)

spin_lock_bh(&vvs->tx_lock);
vvs->tx_cnt -= credit;
vvs->bytes_unsent -= credit;
spin_unlock_bh(&vvs->tx_lock);
}
EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
Expand Down Expand Up @@ -1090,6 +1112,19 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(virtio_transport_destruct);

ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
size_t ret;

spin_lock_bh(&vvs->tx_lock);
ret = vvs->bytes_unsent;
spin_unlock_bh(&vvs->tx_lock);

return ret;
}
EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);

static int virtio_transport_reset(struct vsock_sock *vsk,
struct sk_buff *skb)
{
Expand Down
6 changes: 6 additions & 0 deletions net/vmw_vsock/vsock_loopback.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ static struct virtio_transport loopback_transport = {
.notify_buffer_size = virtio_transport_notify_buffer_size,
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,

.unsent_bytes = virtio_transport_unsent_bytes,

.read_skb = virtio_transport_read_skb,
},

Expand All @@ -123,6 +125,10 @@ static void vsock_loopback_work(struct work_struct *work)
spin_unlock_bh(&vsock->pkt_queue.lock);

while ((skb = __skb_dequeue(&pkts))) {
/* Decrement the bytes_unsent counter without deallocating skb
* It is freed by the receiver.
*/
virtio_transport_consume_skb_sent(skb, false);
virtio_transport_deliver_tap_pkt(skb);
virtio_transport_recv_pkt(&loopback_transport, skb);
}
Expand Down
6 changes: 3 additions & 3 deletions tools/testing/vsock/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po
}

/* Connect to <cid, port> and return the file descriptor. */
static int vsock_connect(unsigned int cid, unsigned int port, int type)
int vsock_connect(unsigned int cid, unsigned int port, int type)
{
union {
struct sockaddr sa;
Expand Down Expand Up @@ -226,8 +226,8 @@ static int vsock_listen(unsigned int cid, unsigned int port, int type)
/* Listen on <cid, port> and return the first incoming connection. The remote
* address is stored to clientaddrp. clientaddrp may be NULL.
*/
static int vsock_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp, int type)
int vsock_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp, int type)
{
union {
struct sockaddr sa;
Expand Down
3 changes: 3 additions & 0 deletions tools/testing/vsock/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ struct test_case {
void init_signals(void);
unsigned int parse_cid(const char *str);
unsigned int parse_port(const char *str);
int vsock_connect(unsigned int cid, unsigned int port, int type);
int vsock_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp, int type);
int vsock_stream_connect(unsigned int cid, unsigned int port);
int vsock_bind_connect(unsigned int cid, unsigned int port,
unsigned int bind_port, int type);
Expand Down
85 changes: 85 additions & 0 deletions tools/testing/vsock/vsock_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include <sys/mman.h>
#include <poll.h>
#include <signal.h>
#include <sys/ioctl.h>
#include <linux/sockios.h>

#include "vsock_test_zerocopy.h"
#include "timeout.h"
Expand Down Expand Up @@ -1238,6 +1240,79 @@ static void test_double_bind_connect_client(const struct test_opts *opts)
}
}

#define MSG_BUF_IOCTL_LEN 64
static void test_unsent_bytes_server(const struct test_opts *opts, int type)
{
unsigned char buf[MSG_BUF_IOCTL_LEN];
int client_fd;

client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type);
if (client_fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
}

recv_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf));
control_writeln("RECEIVED");

close(client_fd);
}

static void test_unsent_bytes_client(const struct test_opts *opts, int type)
{
unsigned char buf[MSG_BUF_IOCTL_LEN];
int ret, fd, sock_bytes_unsent;

fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
}

for (int i = 0; i < sizeof(buf); i++)
buf[i] = rand() & 0xFF;

send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
control_expectln("RECEIVED");

ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
if (ret < 0) {
if (errno == EOPNOTSUPP) {
fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
} else {
perror("ioctl");
exit(EXIT_FAILURE);
}
} else if (ret == 0 && sock_bytes_unsent != 0) {
fprintf(stderr,
"Unexpected 'SIOCOUTQ' value, expected 0, got %i\n",
sock_bytes_unsent);
exit(EXIT_FAILURE);
}

close(fd);
}

static void test_stream_unsent_bytes_client(const struct test_opts *opts)
{
test_unsent_bytes_client(opts, SOCK_STREAM);
}

static void test_stream_unsent_bytes_server(const struct test_opts *opts)
{
test_unsent_bytes_server(opts, SOCK_STREAM);
}

static void test_seqpacket_unsent_bytes_client(const struct test_opts *opts)
{
test_unsent_bytes_client(opts, SOCK_SEQPACKET);
}

static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts)
{
test_unsent_bytes_server(opts, SOCK_SEQPACKET);
}

#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128)
/* This define is the same as in 'include/linux/virtio_vsock.h':
* it is used to decide when to send credit update message during
Expand Down Expand Up @@ -1523,6 +1598,16 @@ static struct test_case test_cases[] = {
.run_client = test_stream_rcvlowat_def_cred_upd_client,
.run_server = test_stream_cred_upd_on_low_rx_bytes,
},
{
.name = "SOCK_STREAM ioctl(SIOCOUTQ) 0 unsent bytes",
.run_client = test_stream_unsent_bytes_client,
.run_server = test_stream_unsent_bytes_server,
},
{
.name = "SOCK_SEQPACKET ioctl(SIOCOUTQ) 0 unsent bytes",
.run_client = test_seqpacket_unsent_bytes_client,
.run_server = test_seqpacket_unsent_bytes_server,
},
{},
};

Expand Down

0 comments on commit 3361a6e

Please sign in to comment.