Skip to content

Commit

Permalink
net: Introduce recvmmsg socket syscall
Browse files Browse the repository at this point in the history
Meaning receive multiple messages, reducing the number of syscalls and
net stack entry/exit operations.

Next patches will introduce mechanisms where protocols that want to
optimize this operation will provide an unlocked_recvmsg operation.

This takes into account comments made by:

. Paul Moore: sock_recvmsg is called only for the first datagram,
  sock_recvmsg_nosec is used for the rest.

. Caitlin Bestler: recvmmsg now has a struct timespec timeout, that
  works in the same fashion as the ppoll one.

  If the underlying protocol returns a datagram with MSG_OOB set, this
  will make recvmmsg return right away with as many datagrams (+ the OOB
  one) it has received so far.

. Rémi Denis-Courmont & Steven Whitehouse: If we receive N < vlen
  datagrams and then recvmsg returns an error, recvmmsg will return
  the successfully received datagrams, store the error and return it
  in the next call.

This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg,
where we will be able to acquire the lock only at batch start and end, not at
every underlying recvmsg call.

Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
acmel authored and davem330 committed Oct 13, 2009
1 parent c05e85a commit a2e2725
Show file tree
Hide file tree
Showing 25 changed files with 261 additions and 50 deletions.
1 change: 1 addition & 0 deletions arch/alpha/kernel/systbls.S
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ sys_call_table:
.quad sys_signalfd
.quad sys_ni_syscall
.quad sys_eventfd
.quad sys_recvmmsg

.size sys_call_table, . - sys_call_table
.type sys_call_table, @object
Expand Down
1 change: 1 addition & 0 deletions arch/arm/kernel/calls.S
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@
CALL(sys_pwritev)
CALL(sys_rt_tgsigqueueinfo)
CALL(sys_perf_event_open)
/* 365 */ CALL(sys_recvmmsg)
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
Expand Down
1 change: 1 addition & 0 deletions arch/avr32/kernel/syscall_table.S
Original file line number Diff line number Diff line change
Expand Up @@ -295,4 +295,5 @@ sys_call_table:
.long sys_signalfd
.long sys_ni_syscall /* 280, was sys_timerfd */
.long sys_eventfd
.long sys_recvmmsg
.long sys_ni_syscall /* r8 is saturated at nr_syscalls */
1 change: 1 addition & 0 deletions arch/blackfin/mach-common/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -1621,6 +1621,7 @@ ENTRY(_sys_call_table)
.long _sys_pwritev
.long _sys_rt_tgsigqueueinfo
.long _sys_perf_event_open
.long _sys_recvmmsg /* 370 */

.rept NR_syscalls-(.-_sys_call_table)/4
.long _sys_ni_syscall
Expand Down
1 change: 1 addition & 0 deletions arch/ia64/kernel/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -1806,6 +1806,7 @@ sys_call_table:
data8 sys_preadv
data8 sys_pwritev // 1320
data8 sys_rt_tgsigqueueinfo
data8 sys_recvmmsg

.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
1 change: 1 addition & 0 deletions arch/microblaze/kernel/syscall_table.S
Original file line number Diff line number Diff line change
Expand Up @@ -371,3 +371,4 @@ ENTRY(sys_call_table)
.long sys_ni_syscall
.long sys_rt_tgsigqueueinfo /* 365 */
.long sys_perf_event_open
.long sys_recvmmsg
1 change: 1 addition & 0 deletions arch/mips/kernel/scall32-o32.S
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,7 @@ einval: li v0, -ENOSYS
sys sys_rt_tgsigqueueinfo 4
sys sys_perf_event_open 5
sys sys_accept4 4
sys sys_recvmmsg 5
.endm

/* We pre-compute the number of _instruction_ bytes needed to
Expand Down
1 change: 1 addition & 0 deletions arch/mips/kernel/scall64-64.S
Original file line number Diff line number Diff line change
Expand Up @@ -420,4 +420,5 @@ sys_call_table:
PTR sys_rt_tgsigqueueinfo
PTR sys_perf_event_open
PTR sys_accept4
PTR sys_recvmmsg
.size sys_call_table,.-sys_call_table
1 change: 1 addition & 0 deletions arch/mips/kernel/scall64-n32.S
Original file line number Diff line number Diff line change
Expand Up @@ -418,4 +418,5 @@ EXPORT(sysn32_call_table)
PTR compat_sys_rt_tgsigqueueinfo /* 5295 */
PTR sys_perf_event_open
PTR sys_accept4
PTR compat_sys_recvmmsg
.size sysn32_call_table,.-sysn32_call_table
1 change: 1 addition & 0 deletions arch/mips/kernel/scall64-o32.S
Original file line number Diff line number Diff line change
Expand Up @@ -538,4 +538,5 @@ sys_call_table:
PTR compat_sys_rt_tgsigqueueinfo
PTR sys_perf_event_open
PTR sys_accept4
PTR compat_sys_recvmmsg
.size sys_call_table,.-sys_call_table
1 change: 1 addition & 0 deletions arch/sh/kernel/syscalls_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,4 @@ sys_call_table:
.long sys_pwritev
.long sys_rt_tgsigqueueinfo
.long sys_perf_event_open
.long sys_recvmmsg /* 365 */
2 changes: 1 addition & 1 deletion arch/sparc/kernel/systbls_32.S
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,5 @@ sys_call_table:
/*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
/*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg

4 changes: 2 additions & 2 deletions arch/sparc/kernel/systbls_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ sys_call_table32:
/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open
.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg

#endif /* CONFIG_COMPAT */

Expand Down Expand Up @@ -158,4 +158,4 @@ sys_call_table:
/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
1 change: 1 addition & 0 deletions arch/x86/ia32/ia32entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -832,4 +832,5 @@ ia32_sys_call_table:
.quad compat_sys_pwritev
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
.quad sys_perf_event_open
.quad compat_sys_recvmmsg
ia32_syscall_end:
3 changes: 2 additions & 1 deletion arch/x86/include/asm/unistd_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,10 +342,11 @@
#define __NR_pwritev 334
#define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_event_open 336
#define __NR_recvmmsg 337

#ifdef __KERNEL__

#define NR_syscalls 337
#define NR_syscalls 338

#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/asm/unistd_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
#define __NR_perf_event_open 298
__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
#define __NR_recvmmsg 299
__SYSCALL(__NR_recvmmsg, sys_recvmmsg)

#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/syscall_table_32.S
Original file line number Diff line number Diff line change
Expand Up @@ -336,3 +336,4 @@ ENTRY(sys_call_table)
.long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_event_open
.long sys_recvmmsg
4 changes: 3 additions & 1 deletion arch/xtensa/include/asm/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -681,8 +681,10 @@ __SYSCALL(304, sys_signalfd, 3)
__SYSCALL(305, sys_ni_syscall, 0)
#define __NR_eventfd 306
__SYSCALL(306, sys_eventfd, 1)
#define __NR_recvmmsg 307
__SYSCALL(307, sys_recvmmsg, 5)

#define __NR_syscall_count 307
#define __NR_syscall_count 308

/*
* sysxtensa syscall handler
Expand Down
1 change: 1 addition & 0 deletions include/linux/net.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#define SYS_SENDMSG 16 /* sys_sendmsg(2) */
#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
#define SYS_ACCEPT4 18 /* sys_accept4(2) */
#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */

typedef enum {
SS_FREE = 0, /* not allocated */
Expand Down
10 changes: 10 additions & 0 deletions include/linux/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ struct msghdr {
unsigned msg_flags;
};

/* For recvmmsg/sendmmsg */
struct mmsghdr {
struct msghdr msg_hdr;
unsigned msg_len;
};

/*
* POSIX 1003.1g - ancillary data object information
* Ancillary data consits of a sequence of pairs of
Expand Down Expand Up @@ -312,6 +318,10 @@ extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uadd
extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr);
extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);

struct timespec;

extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
unsigned int flags, struct timespec *timeout);
#endif
#endif /* not kernel and not glibc */
#endif /* _LINUX_SOCKET_H */
4 changes: 4 additions & 0 deletions include/linux/syscalls.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct linux_dirent64;
struct list_head;
struct msgbuf;
struct msghdr;
struct mmsghdr;
struct msqid_ds;
struct new_utsname;
struct nfsctl_arg;
Expand Down Expand Up @@ -677,6 +678,9 @@ asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
struct sockaddr __user *, int __user *);
asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags);
asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg,
unsigned int vlen, unsigned flags,
struct timespec __user *timeout);
asmlinkage long sys_socket(int, int, int);
asmlinkage long sys_socketpair(int, int, int, int __user *);
asmlinkage long sys_socketcall(int call, unsigned long __user *args);
Expand Down
8 changes: 8 additions & 0 deletions include/net/compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ struct compat_msghdr {
compat_uint_t msg_flags;
};

struct compat_mmsghdr {
struct compat_msghdr msg_hdr;
compat_uint_t msg_len;
};

struct compat_cmsghdr {
compat_size_t cmsg_len;
compat_int_t cmsg_level;
Expand All @@ -35,6 +40,9 @@ extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int);
extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned);
extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *,
unsigned, unsigned,
struct timespec __user *);
extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *);
extern int put_cmsg_compat(struct msghdr*, int, int, int, void *);

Expand Down
2 changes: 2 additions & 0 deletions kernel/sys_ni.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ cond_syscall(sys_shutdown);
cond_syscall(sys_sendmsg);
cond_syscall(compat_sys_sendmsg);
cond_syscall(sys_recvmsg);
cond_syscall(sys_recvmmsg);
cond_syscall(compat_sys_recvmsg);
cond_syscall(compat_sys_recvfrom);
cond_syscall(compat_sys_recvmmsg);
cond_syscall(sys_socketcall);
cond_syscall(sys_futex);
cond_syscall(compat_sys_futex);
Expand Down
33 changes: 30 additions & 3 deletions net/compat.c
Original file line number Diff line number Diff line change
Expand Up @@ -727,10 +727,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt);

/* Argument list sizes for compat_sys_socketcall */
#define AL(x) ((x) * sizeof(u32))
static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
AL(4)};
AL(4),AL(5)};
#undef AL

asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
Expand All @@ -755,13 +755,36 @@ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len,
return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen);
}

asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned vlen, unsigned int flags,
struct timespec __user *timeout)
{
int datagrams;
struct timespec ktspec;
struct compat_timespec __user *utspec =
(struct compat_timespec __user *)timeout;

if (get_user(ktspec.tv_sec, &utspec->tv_sec) ||
get_user(ktspec.tv_nsec, &utspec->tv_nsec))
return -EFAULT;

datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT, &ktspec);
if (datagrams > 0 &&
(put_user(ktspec.tv_sec, &utspec->tv_sec) ||
put_user(ktspec.tv_nsec, &utspec->tv_nsec)))
datagrams = -EFAULT;

return datagrams;
}

asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
{
int ret;
u32 a[6];
u32 a0, a1;

if (call < SYS_SOCKET || call > SYS_ACCEPT4)
if (call < SYS_SOCKET || call > SYS_RECVMMSG)
return -EINVAL;
if (copy_from_user(a, args, nas[call]))
return -EFAULT;
Expand Down Expand Up @@ -823,6 +846,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
case SYS_RECVMSG:
ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
break;
case SYS_RECVMMSG:
ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
compat_ptr(a[4]));
break;
case SYS_ACCEPT4:
ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
break;
Expand Down
Loading

0 comments on commit a2e2725

Please sign in to comment.