Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf 2021-11-16

We've added 12 non-merge commits during the last 5 day(s) which contain
a total of 23 files changed, 573 insertions(+), 73 deletions(-).

The main changes are:

1) Fix pruning regression where verifier went overly conservative rejecting
   previsouly accepted programs, from Alexei Starovoitov and Lorenz Bauer.

2) Fix verifier TOCTOU bug when using read-only map's values as constant
   scalars during verification, from Daniel Borkmann.

3) Fix a crash due to a double free in XSK's buffer pool, from Magnus Karlsson.

4) Fix libbpf regression when cross-building runqslower, from Jean-Philippe Brucker.

5) Forbid use of bpf_ktime_get_coarse_ns() and bpf_timer_*() helpers in tracing
   programs due to deadlock possibilities, from Dmitrii Banshchikov.

6) Fix checksum validation in sockmap's udp_read_sock() callback, from Cong Wang.

7) Various BPF sample fixes such as XDP stats in xdp_sample_user, from Alexander Lobakin.

8) Fix libbpf gen_loader error handling wrt fd cleanup, from Kumar Kartikeya Dwivedi.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  udp: Validate checksum in udp_read_sock()
  bpf: Fix toctou on read-only map's constant scalar tracking
  samples/bpf: Fix build error due to -isystem removal
  selftests/bpf: Add tests for restricted helpers
  bpf: Forbid bpf_ktime_get_coarse_ns and bpf_timer_* in tracing progs
  libbpf: Perform map fd cleanup for gen_loader in case of error
  samples/bpf: Fix incorrect use of strlen in xdp_redirect_cpu
  tools/runqslower: Fix cross-build
  samples/bpf: Fix summary per-sec stats in xdp_sample_user
  selftests/bpf: Check map in map pruning
  bpf: Fix inner map state pruning regression.
  xsk: Fix crash on double free in buffer pool
====================

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
kuba-moo committed Nov 17, 2021
2 parents 848e5d6 + 099f896 commit f083ec3
Show file tree
Hide file tree
Showing 23 changed files with 573 additions and 73 deletions.
3 changes: 2 additions & 1 deletion include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ struct bpf_map {
atomic64_t usercnt;
struct work_struct work;
struct mutex freeze_mutex;
u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */
atomic64_t writecnt;
};

static inline bool map_value_has_spin_lock(const struct bpf_map *map)
Expand Down Expand Up @@ -1419,6 +1419,7 @@ void bpf_map_put(struct bpf_map *map);
void *bpf_map_area_alloc(u64 size, int numa_node);
void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
void bpf_map_area_free(void *base);
bool bpf_map_write_active(const struct bpf_map *map);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
int generic_map_lookup_batch(struct bpf_map *map,
const union bpf_attr *attr,
Expand Down
2 changes: 2 additions & 0 deletions kernel/bpf/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1809,6 +1809,8 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sysctl_get_new_value_proto;
case BPF_FUNC_sysctl_set_new_value:
return &bpf_sysctl_set_new_value_proto;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
return cgroup_base_func_proto(func_id, prog);
}
Expand Down
2 changes: 0 additions & 2 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1364,8 +1364,6 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_ktime_get_boot_ns:
return &bpf_ktime_get_boot_ns_proto;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
case BPF_FUNC_ringbuf_output:
return &bpf_ringbuf_output_proto;
case BPF_FUNC_ringbuf_reserve:
Expand Down
57 changes: 36 additions & 21 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,21 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
}

static void bpf_map_write_active_inc(struct bpf_map *map)
{
atomic64_inc(&map->writecnt);
}

static void bpf_map_write_active_dec(struct bpf_map *map)
{
atomic64_dec(&map->writecnt);
}

bool bpf_map_write_active(const struct bpf_map *map)
{
return atomic64_read(&map->writecnt) != 0;
}

static u32 bpf_map_value_size(const struct bpf_map *map)
{
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
Expand Down Expand Up @@ -601,23 +616,17 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma)
{
struct bpf_map *map = vma->vm_file->private_data;

if (vma->vm_flags & VM_MAYWRITE) {
mutex_lock(&map->freeze_mutex);
map->writecnt++;
mutex_unlock(&map->freeze_mutex);
}
if (vma->vm_flags & VM_MAYWRITE)
bpf_map_write_active_inc(map);
}

/* called for all unmapped memory region (including initial) */
static void bpf_map_mmap_close(struct vm_area_struct *vma)
{
struct bpf_map *map = vma->vm_file->private_data;

if (vma->vm_flags & VM_MAYWRITE) {
mutex_lock(&map->freeze_mutex);
map->writecnt--;
mutex_unlock(&map->freeze_mutex);
}
if (vma->vm_flags & VM_MAYWRITE)
bpf_map_write_active_dec(map);
}

static const struct vm_operations_struct bpf_map_default_vmops = {
Expand Down Expand Up @@ -668,7 +677,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
goto out;

if (vma->vm_flags & VM_MAYWRITE)
map->writecnt++;
bpf_map_write_active_inc(map);
out:
mutex_unlock(&map->freeze_mutex);
return err;
Expand Down Expand Up @@ -1139,6 +1148,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
bpf_map_write_active_inc(map);
if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
Expand Down Expand Up @@ -1174,6 +1184,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
free_key:
kvfree(key);
err_put:
bpf_map_write_active_dec(map);
fdput(f);
return err;
}
Expand All @@ -1196,6 +1207,7 @@ static int map_delete_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
bpf_map_write_active_inc(map);
if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
Expand Down Expand Up @@ -1226,6 +1238,7 @@ static int map_delete_elem(union bpf_attr *attr)
out:
kvfree(key);
err_put:
bpf_map_write_active_dec(map);
fdput(f);
return err;
}
Expand Down Expand Up @@ -1533,6 +1546,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
bpf_map_write_active_inc(map);
if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
Expand Down Expand Up @@ -1597,6 +1611,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
free_key:
kvfree(key);
err_put:
bpf_map_write_active_dec(map);
fdput(f);
return err;
}
Expand Down Expand Up @@ -1624,8 +1639,7 @@ static int map_freeze(const union bpf_attr *attr)
}

mutex_lock(&map->freeze_mutex);

if (map->writecnt) {
if (bpf_map_write_active(map)) {
err = -EBUSY;
goto err_put;
}
Expand Down Expand Up @@ -4171,6 +4185,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
union bpf_attr __user *uattr,
int cmd)
{
bool has_read = cmd == BPF_MAP_LOOKUP_BATCH ||
cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
struct bpf_map *map;
int err, ufd;
struct fd f;
Expand All @@ -4183,16 +4200,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);

if ((cmd == BPF_MAP_LOOKUP_BATCH ||
cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
if (has_write)
bpf_map_write_active_inc(map);
if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
err = -EPERM;
goto err_put;
}

if (cmd != BPF_MAP_LOOKUP_BATCH &&
!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
Expand All @@ -4205,8 +4219,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
BPF_DO_BATCH(map->ops->map_update_batch);
else
BPF_DO_BATCH(map->ops->map_delete_batch);

err_put:
if (has_write)
bpf_map_write_active_dec(map);
fdput(f);
return err;
}
Expand Down
27 changes: 25 additions & 2 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -1151,7 +1151,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
/* transfer reg's id which is unique for every map_lookup_elem
* as UID of the inner map.
*/
reg->map_uid = reg->id;
if (map_value_has_timer(map->inner_map_meta))
reg->map_uid = reg->id;
} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
reg->type = PTR_TO_XDP_SOCK;
} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
Expand Down Expand Up @@ -4055,7 +4056,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)

static bool bpf_map_is_rdonly(const struct bpf_map *map)
{
return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
/* A map is considered read-only if the following condition are true:
*
* 1) BPF program side cannot change any of the map content. The
* BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
* and was set at map creation time.
* 2) The map value(s) have been initialized from user space by a
* loader and then "frozen", such that no new map update/delete
* operations from syscall side are possible for the rest of
* the map's lifetime from that point onwards.
* 3) Any parallel/pending map update/delete operations from syscall
* side have been completed. Only after that point, it's safe to
* assume that map value(s) are immutable.
*/
return (map->map_flags & BPF_F_RDONLY_PROG) &&
READ_ONCE(map->frozen) &&
!bpf_map_write_active(map);
}

static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
Expand Down Expand Up @@ -11631,6 +11647,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
}
}

if (map_value_has_timer(map)) {
if (is_tracing_prog_type(prog_type)) {
verbose(env, "tracing progs cannot use bpf_timer yet\n");
return -EINVAL;
}
}

if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
!bpf_offload_prog_map_match(prog, map)) {
verbose(env, "offload device mismatch between prog and map\n");
Expand Down
2 changes: 0 additions & 2 deletions kernel/trace/bpf_trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -1111,8 +1111,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_ktime_get_boot_ns:
return &bpf_ktime_get_boot_ns_proto;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
case BPF_FUNC_get_current_pid_tgid:
Expand Down
6 changes: 6 additions & 0 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -7162,6 +7162,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_cg_sock_proto;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
return bpf_base_func_proto(func_id);
}
Expand Down Expand Up @@ -10327,6 +10329,8 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
return &sk_reuseport_load_bytes_relative_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_ptr_cookie_proto;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
return bpf_base_func_proto(func_id);
}
Expand Down Expand Up @@ -10833,6 +10837,8 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skc_to_unix_sock:
func = &bpf_skc_to_unix_sock_proto;
break;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
return bpf_base_func_proto(func_id);
}
Expand Down
2 changes: 2 additions & 0 deletions net/ipv4/bpf_tcp_ca.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
offsetof(struct tcp_congestion_ops, release))
return &bpf_sk_getsockopt_proto;
return NULL;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
return bpf_base_func_proto(func_id);
}
Expand Down
11 changes: 11 additions & 0 deletions net/ipv4/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1807,6 +1807,17 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
skb = skb_recv_udp(sk, 0, 1, &err);
if (!skb)
return err;

if (udp_lib_checksum_complete(skb)) {
__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
IS_UDPLITE(sk));
__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
IS_UDPLITE(sk));
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
continue;
}

used = recv_actor(desc, skb, 0, skb->len);
if (used <= 0) {
if (!copied)
Expand Down
7 changes: 5 additions & 2 deletions net/xdp/xsk_buff_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
pool->free_list_cnt--;
xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
free_list_node);
list_del(&xskb->free_list_node);
list_del_init(&xskb->free_list_node);
}

xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
Expand Down Expand Up @@ -568,7 +568,7 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3
i = nb_entries;
while (i--) {
xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
list_del(&xskb->free_list_node);
list_del_init(&xskb->free_list_node);

*xdp = &xskb->xdp;
xdp++;
Expand Down Expand Up @@ -615,6 +615,9 @@ EXPORT_SYMBOL(xp_can_alloc);

void xp_free(struct xdp_buff_xsk *xskb)
{
if (!list_empty(&xskb->free_list_node))
return;

xskb->pool->free_list_cnt++;
list_add(&xskb->free_list_node, &xskb->pool->free_list);
}
Expand Down
2 changes: 0 additions & 2 deletions samples/bpf/hbm_kern.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
* Include file for sample Host Bandwidth Manager (HBM) BPF programs
*/
#define KBUILD_MODNAME "foo"
#include <stddef.h>
#include <stdbool.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
Expand Down
5 changes: 2 additions & 3 deletions samples/bpf/xdp_redirect_cpu_user.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,6 @@ int main(int argc, char **argv)
const char *mprog_filename = NULL, *mprog_name = NULL;
struct xdp_redirect_cpu *skel;
struct bpf_map_info info = {};
char ifname_buf[IF_NAMESIZE];
struct bpf_cpumap_val value;
__u32 infosz = sizeof(info);
int ret = EXIT_FAIL_OPTION;
Expand Down Expand Up @@ -390,10 +389,10 @@ int main(int argc, char **argv)
case 'd':
if (strlen(optarg) >= IF_NAMESIZE) {
fprintf(stderr, "-d/--dev name too long\n");
usage(argv, long_options, __doc__, mask, true, skel->obj);
goto end_cpu;
}
safe_strncpy(ifname_buf, optarg, strlen(ifname_buf));
ifindex = if_nametoindex(ifname_buf);
ifindex = if_nametoindex(optarg);
if (!ifindex)
ifindex = strtoul(optarg, NULL, 0);
if (!ifindex) {
Expand Down
Loading

0 comments on commit f083ec3

Please sign in to comment.