From 06e71ad534881d2a09ced7509d2ab0daedac4c96 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 8 Jul 2024 13:45:38 -0700 Subject: [PATCH 01/23] bpftool: improve skeleton backwards compat with old buggy libbpfs Old versions of libbpf don't handle varying sizes of bpf_map_skeleton struct correctly. As such, BPF skeleton generated by newest bpftool might not be compatible with older libbpf (though only when libbpf is used as a shared library), even though it, by design, should. Going forward libbpf will be fixed, plus we'll release bug fixed versions of relevant old libbpfs, but meanwhile try to mitigate from bpftool side by conservatively assuming older and smaller definition of bpf_map_skeleton, if possible. Meaning, if there are no struct_ops maps. If there are struct_ops, then presumably user would like to have auto-attaching logic and struct_ops map link placeholders, so use the full bpf_map_skeleton definition in that case. Acked-by: Quentin Monnet Co-developed-by: Mykyta Yatsenko Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240708204540.4188946-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/gen.c | 46 ++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 51eaed76db9738..5a4d3240689ed0 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -852,24 +852,41 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool { struct bpf_map *map; char ident[256]; - size_t i; + size_t i, map_sz; if (!map_cnt) return; + /* for backward compatibility with old libbpf versions that don't + * handle new BPF skeleton with new struct bpf_map_skeleton definition + * that includes link field, avoid specifying new increased size, + * unless we absolutely have to (i.e., if there are struct_ops maps + * present) + */ + map_sz = offsetof(struct bpf_map_skeleton, link); + if (populate_links) { + bpf_object__for_each_map(map, obj) { + if (bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) { + map_sz = sizeof(struct bpf_map_skeleton); + break; + } + } + } + codegen("\ \n\ - \n\ + \n\ /* maps */ \n\ s->map_cnt = %zu; \n\ - s->map_skel_sz = sizeof(*s->maps); \n\ - s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\ + s->map_skel_sz = %zu; \n\ + s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt,\n\ + sizeof(*s->maps) > %zu ? sizeof(*s->maps) : %zu);\n\ if (!s->maps) { \n\ err = -ENOMEM; \n\ goto err; \n\ } \n\ ", - map_cnt + map_cnt, map_sz, map_sz, map_sz ); i = 0; bpf_object__for_each_map(map, obj) { @@ -878,23 +895,22 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool codegen("\ \n\ - \n\ - s->maps[%zu].name = \"%s\"; \n\ - s->maps[%zu].map = &obj->maps.%s; \n\ + \n\ + map = (struct bpf_map_skeleton *)((char *)s->maps + %zu * s->map_skel_sz);\n\ + map->name = \"%s\"; \n\ + map->map = &obj->maps.%s; \n\ ", - i, bpf_map__name(map), i, ident); + i, bpf_map__name(map), ident); /* memory-mapped internal maps */ if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) { - printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", - i, ident); + printf("\tmap->mmaped = (void **)&obj->%s;\n", ident); } if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) { codegen("\ \n\ - s->maps[%zu].link = &obj->links.%s;\n\ - ", - i, ident); + map->link = &obj->links.%s; \n\ + ", ident); } i++; } @@ -1463,6 +1479,7 @@ static int do_skeleton(int argc, char **argv) %1$s__create_skeleton(struct %1$s *obj) \n\ { \n\ struct bpf_object_skeleton *s; \n\ + struct bpf_map_skeleton *map __attribute__((unused));\n\ int err; \n\ \n\ s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ @@ -1753,6 +1770,7 @@ static int do_subskeleton(int argc, char **argv) { \n\ struct %1$s *obj; \n\ struct bpf_object_subskeleton *s; \n\ + struct bpf_map_skeleton *map __attribute__((unused));\n\ int err; \n\ \n\ obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\ From 99fb9531886d8ffa0aa9a693089784c7338518a3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 8 Jul 2024 13:45:39 -0700 Subject: [PATCH 02/23] libbpf: fix BPF skeleton forward/backward compat handling BPF skeleton was designed from day one to be extensible. Generated BPF skeleton code specifies actual sizes of map/prog/variable skeletons for that reason and libbpf is supposed to work with newer/older versions correctly. Unfortunately, it was missed that we implicitly embed hard-coded most up-to-date (according to libbpf's version of libbpf.h header used to compile BPF skeleton header) sizes of those structs, which can differ from the actual sizes at runtime when libbpf is used as a shared library. We have a few places were we just index array of maps/progs/vars, which implicitly uses these potentially invalid sizes of structs. This patch aims to fix this problem going forward. Once this lands, we'll backport these changes in Github repo to create patched releases for older libbpfs. Acked-by: Eduard Zingerman Reviewed-by: Alan Maguire Fixes: d66562fba1ce ("libbpf: Add BPF object skeleton support") Fixes: 430025e5dca5 ("libbpf: Add subskeleton scaffolding") Fixes: 08ac454e258e ("libbpf: Auto-attach struct_ops BPF maps in BPF skeleton") Co-developed-by: Mykyta Yatsenko Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240708204540.4188946-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 47 ++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 30f121754d831a..8625c948f60a80 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -13712,14 +13712,15 @@ int libbpf_num_possible_cpus(void) static int populate_skeleton_maps(const struct bpf_object *obj, struct bpf_map_skeleton *maps, - size_t map_cnt) + size_t map_cnt, size_t map_skel_sz) { int i; for (i = 0; i < map_cnt; i++) { - struct bpf_map **map = maps[i].map; - const char *name = maps[i].name; - void **mmaped = maps[i].mmaped; + struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz; + struct bpf_map **map = map_skel->map; + const char *name = map_skel->name; + void **mmaped = map_skel->mmaped; *map = bpf_object__find_map_by_name(obj, name); if (!*map) { @@ -13736,13 +13737,14 @@ static int populate_skeleton_maps(const struct bpf_object *obj, static int populate_skeleton_progs(const struct bpf_object *obj, struct bpf_prog_skeleton *progs, - size_t prog_cnt) + size_t prog_cnt, size_t prog_skel_sz) { int i; for (i = 0; i < prog_cnt; i++) { - struct bpf_program **prog = progs[i].prog; - const char *name = progs[i].name; + struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz; + struct bpf_program **prog = prog_skel->prog; + const char *name = prog_skel->name; *prog = bpf_object__find_program_by_name(obj, name); if (!*prog) { @@ -13783,13 +13785,13 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, } *s->obj = obj; - err = populate_skeleton_maps(obj, s->maps, s->map_cnt); + err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); return libbpf_err(err); } - err = populate_skeleton_progs(obj, s->progs, s->prog_cnt); + err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); return libbpf_err(err); @@ -13819,20 +13821,20 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) return libbpf_err(-errno); } - err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt); + err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { pr_warn("failed to populate subskeleton maps: %d\n", err); return libbpf_err(err); } - err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt); + err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { pr_warn("failed to populate subskeleton maps: %d\n", err); return libbpf_err(err); } for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { - var_skel = &s->vars[var_idx]; + var_skel = (void *)s->vars + var_idx * s->var_skel_sz; map = *var_skel->map; map_type_id = bpf_map__btf_value_type_id(map); map_type = btf__type_by_id(btf, map_type_id); @@ -13879,10 +13881,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) } for (i = 0; i < s->map_cnt; i++) { - struct bpf_map *map = *s->maps[i].map; + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; size_t mmap_sz = bpf_map_mmap_sz(map); int prot, map_fd = map->fd; - void **mmaped = s->maps[i].mmaped; + void **mmaped = map_skel->mmaped; if (!mmaped) continue; @@ -13930,8 +13933,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) int i, err; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_program *prog = *s->progs[i].prog; - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_program *prog = *prog_skel->prog; + struct bpf_link **link = prog_skel->link; if (!prog->autoload || !prog->autoattach) continue; @@ -13970,8 +13974,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) return 0; for (i = 0; i < s->map_cnt; i++) { - struct bpf_map *map = *s->maps[i].map; - struct bpf_link **link = s->maps[i].link; + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; + struct bpf_link **link = map_skel->link; if (!map->autocreate || !map->autoattach) continue; @@ -14000,7 +14005,8 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) int i; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_link **link = prog_skel->link; bpf_link__destroy(*link); *link = NULL; @@ -14010,7 +14016,8 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) return; for (i = 0; i < s->map_cnt; i++) { - struct bpf_link **link = s->maps[i].link; + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_link **link = map_skel->link; if (link) { bpf_link__destroy(*link); From a459f4bb27f2e2730039c57786b82288742c8c74 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 8 Jul 2024 13:45:40 -0700 Subject: [PATCH 03/23] libbpf: improve old BPF skeleton handling for map auto-attach Improve how we handle old BPF skeletons when it comes to BPF map auto-attachment. Emit one warn-level message per each struct_ops map that could have been auto-attached, if user provided recent enough BPF skeleton version. Don't spam log if there are no relevant struct_ops maps, though. This should help users realize that they probably need to regenerate BPF skeleton header with more recent bpftool/libbpf-cargo (or whatever other means of BPF skeleton generation). Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240708204540.4188946-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8625c948f60a80..a3be6f8fac09ea 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -13967,32 +13967,34 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) */ } - /* Skeleton is created with earlier version of bpftool - * which does not support auto-attachment - */ - if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) - return 0; for (i = 0; i < s->map_cnt; i++) { struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; struct bpf_map *map = *map_skel->map; - struct bpf_link **link = map_skel->link; + struct bpf_link **link; if (!map->autocreate || !map->autoattach) continue; - if (*link) - continue; - /* only struct_ops maps can be attached */ if (!bpf_map__is_struct_ops(map)) continue; - *link = bpf_map__attach_struct_ops(map); + /* skeleton is created with earlier version of bpftool, notify user */ + if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) { + pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n", + bpf_map__name(map)); + continue; + } + + link = map_skel->link; + if (*link) + continue; + + *link = bpf_map__attach_struct_ops(map); if (!*link) { err = -errno; - pr_warn("map '%s': failed to auto-attach: %d\n", - bpf_map__name(map), err); + pr_warn("map '%s': failed to auto-attach: %d\n", bpf_map__name(map), err); return libbpf_err(err); } } From 605c96997d89c01c11bbddb4db820ede570581c7 Mon Sep 17 00:00:00 2001 From: Matt Bobrowski Date: Tue, 9 Jul 2024 21:09:39 +0000 Subject: [PATCH 04/23] bpf: relax zero fixed offset constraint on KF_TRUSTED_ARGS/KF_RCU Currently, BPF kfuncs which accept trusted pointer arguments i.e. those flagged as KF_TRUSTED_ARGS, KF_RCU, or KF_RELEASE, all require an original/unmodified trusted pointer argument to be supplied to them. By original/unmodified, it means that the backing register holding the trusted pointer argument that is to be supplied to the BPF kfunc must have its fixed offset set to zero, or else the BPF verifier will outright reject the BPF program load. However, this zero fixed offset constraint that is currently enforced by the BPF verifier onto BPF kfuncs specifically flagged to accept KF_TRUSTED_ARGS or KF_RCU trusted pointer arguments is rather unnecessary, and can limit their usability in practice. Specifically, it completely eliminates the possibility of constructing a derived trusted pointer from an original trusted pointer. To put it simply, a derived pointer is a pointer which points to one of the nested member fields of the object being pointed to by the original trusted pointer. This patch relaxes the zero fixed offset constraint that is enforced upon BPF kfuncs which specifically accept KF_TRUSTED_ARGS, or KF_RCU arguments. Although, the zero fixed offset constraint technically also applies to BPF kfuncs accepting KF_RELEASE arguments, relaxing this constraint for such BPF kfuncs has subtle and unwanted side-effects. This was discovered by experimenting a little further with an initial version of this patch series [0]. The primary issue with relaxing the zero fixed offset constraint on BPF kfuncs accepting KF_RELEASE arguments is that it'd would open up the opportunity for BPF programs to supply both trusted pointers and derived trusted pointers to them. For KF_RELEASE BPF kfuncs specifically, this could be problematic as resources associated with the backing pointer could be released by the backing BPF kfunc and cause instabilities for the rest of the kernel. With this new fixed offset semantic in-place for BPF kfuncs accepting KF_TRUSTED_ARGS and KF_RCU arguments, we now have more flexibility when it comes to the BPF kfuncs that we're able to introduce moving forward. Early discussions covering the possibility of relaxing the zero fixed offset constraint can be found using the link below. This will provide more context on where all this has stemmed from [1]. Notably, pre-existing tests have been updated such that they provide coverage for the updated zero fixed offset functionality. Specifically, the nested offset test was converted from a negative to positive test as it was already designed to assert zero fixed offset semantics of a KF_TRUSTED_ARGS BPF kfunc. [0] https://lore.kernel.org/bpf/ZnA9ndnXKtHOuYMe@google.com/ [1] https://lore.kernel.org/bpf/ZhkbrM55MKQ0KeIV@google.com/ Signed-off-by: Matt Bobrowski Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20240709210939.1544011-1-mattbobrowski@google.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 9 +++------ tools/testing/selftests/bpf/progs/nested_trust_failure.c | 8 -------- tools/testing/selftests/bpf/progs/nested_trust_success.c | 8 ++++++++ tools/testing/selftests/bpf/verifier/calls.c | 2 +- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3d6306c363b7d1..c0263fb5ca4b99 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11335,7 +11335,9 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id)) strict_type_match = true; - WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off); + WARN_ON_ONCE(is_kfunc_release(meta) && + (reg->off || !tnum_is_const(reg->var_off) || + reg->var_off.value)); reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); @@ -11917,12 +11919,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EINVAL; } } - fallthrough; case KF_ARG_PTR_TO_CTX: - /* Trusted arguments have the same offset checks as release arguments */ - arg_type |= OBJ_RELEASE; - break; case KF_ARG_PTR_TO_DYNPTR: case KF_ARG_PTR_TO_ITER: case KF_ARG_PTR_TO_LIST_HEAD: @@ -11935,7 +11933,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_REFCOUNTED_KPTR: case KF_ARG_PTR_TO_CONST_STR: case KF_ARG_PTR_TO_WORKQUEUE: - /* Trusted by default */ break; default: WARN_ON_ONCE(1); diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c index ea39497f11ed7b..3568ec4501002a 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c @@ -31,14 +31,6 @@ int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_ return 0; } -SEC("tp_btf/task_newtask") -__failure __msg("R1 must have zero offset when passed to release func or trusted arg to kfunc") -int BPF_PROG(test_invalid_nested_offset, struct task_struct *task, u64 clone_flags) -{ - bpf_cpumask_first_zero(&task->cpus_mask); - return 0; -} - /* Although R2 is of type sk_buff but sock_common is expected, we will hit untrusted ptr first. */ SEC("tp_btf/tcp_probe") __failure __msg("R2 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_") diff --git a/tools/testing/selftests/bpf/progs/nested_trust_success.c b/tools/testing/selftests/bpf/progs/nested_trust_success.c index 833840bffd3b08..2b66953ca82ea2 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_success.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_success.c @@ -32,3 +32,11 @@ int BPF_PROG(test_skb_field, struct sock *sk, struct sk_buff *skb) bpf_sk_storage_get(&sk_storage_map, skb->sk, 0, 0); return 0; } + +SEC("tp_btf/task_newtask") +__success +int BPF_PROG(test_nested_offset, struct task_struct *task, u64 clone_flags) +{ + bpf_cpumask_first_zero(&task->cpus_mask); + return 0; +} diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index ab25a81fd3a108..d76ef20188592e 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -76,7 +76,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc", + .errstr = "arg#0 expected pointer to ctx, but got PTR", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_pass_ctx", 2 }, }, From c13fda93aca118b8e5cd202e339046728ee7dddb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 10 Jul 2024 16:16:31 +0200 Subject: [PATCH 05/23] bpf: Remove tst_run from lwt_seg6local_prog_ops. The syzbot reported that the lwt_seg6 related BPF ops can be invoked via bpf_test_run() without without entering input_action_end_bpf() first. Martin KaFai Lau said that self test for BPF_PROG_TYPE_LWT_SEG6LOCAL probably didn't work since it was introduced in commit 04d4b274e2a ("ipv6: sr: Add seg6local action End.BPF"). The reason is that the per-CPU variable seg6_bpf_srh_states::srh is never assigned in the self test case but each BPF function expects it. Remove test_run for BPF_PROG_TYPE_LWT_SEG6LOCAL. Suggested-by: Martin KaFai Lau Reported-by: syzbot+608a2acde8c5a101d07d@syzkaller.appspotmail.com Fixes: d1542d4ae4df ("seg6: Use nested-BH locking for seg6_bpf_srh_states.") Fixes: 004d4b274e2a ("ipv6: sr: Add seg6local action End.BPF") Signed-off-by: Sebastian Andrzej Siewior Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20240710141631.FbmHcQaX@linutronix.de Signed-off-by: Martin KaFai Lau --- net/core/filter.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index d767880c276d9a..4cf1d34f761728 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11053,7 +11053,6 @@ const struct bpf_verifier_ops lwt_seg6local_verifier_ops = { }; const struct bpf_prog_ops lwt_seg6local_prog_ops = { - .test_run = bpf_prog_test_run_skb, }; const struct bpf_verifier_ops cg_sock_verifier_ops = { From eeb23b54e447ea62b247d89681f0140abab00d7f Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 10 Jul 2024 16:00:51 +0100 Subject: [PATCH 06/23] selftests/bpf: fix compilation failure when CONFIG_NF_FLOW_TABLE=m In many cases, kernel netfilter functionality is built as modules. If CONFIG_NF_FLOW_TABLE=m in particular, progs/xdp_flowtable.c (and hence selftests) will fail to compile, so add a ___local version of "struct flow_ports". Fixes: c77e572d3a8c ("selftests/bpf: Add selftest for bpf_xdp_flow_lookup kfunc") Signed-off-by: Alan Maguire Link: https://lore.kernel.org/r/20240710150051.192598-1-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/xdp_flowtable.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/xdp_flowtable.c b/tools/testing/selftests/bpf/progs/xdp_flowtable.c index 15209650f73b8c..7fdc7b23ee7498 100644 --- a/tools/testing/selftests/bpf/progs/xdp_flowtable.c +++ b/tools/testing/selftests/bpf/progs/xdp_flowtable.c @@ -58,6 +58,10 @@ static bool xdp_flowtable_offload_check_tcp_state(void *ports, void *data_end, return true; } +struct flow_ports___local { + __be16 source, dest; +} __attribute__((preserve_access_index)); + SEC("xdp.frags") int xdp_flowtable_do_lookup(struct xdp_md *ctx) { @@ -69,7 +73,7 @@ int xdp_flowtable_do_lookup(struct xdp_md *ctx) }; void *data = (void *)(long)ctx->data; struct ethhdr *eth = data; - struct flow_ports *ports; + struct flow_ports___local *ports; __u32 *val, key = 0; if (eth + 1 > data_end) @@ -79,7 +83,7 @@ int xdp_flowtable_do_lookup(struct xdp_md *ctx) case bpf_htons(ETH_P_IP): { struct iphdr *iph = data + sizeof(*eth); - ports = (struct flow_ports *)(iph + 1); + ports = (struct flow_ports___local *)(iph + 1); if (ports + 1 > data_end) return XDP_PASS; @@ -106,7 +110,7 @@ int xdp_flowtable_do_lookup(struct xdp_md *ctx) struct in6_addr *dst = (struct in6_addr *)tuple.ipv6_dst; struct ipv6hdr *ip6h = data + sizeof(*eth); - ports = (struct flow_ports *)(ip6h + 1); + ports = (struct flow_ports___local *)(ip6h + 1); if (ports + 1 > data_end) return XDP_PASS; From a3016a27cea8e6d10b200b9e19c19961c402d106 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 9 Jul 2024 17:16:17 +0800 Subject: [PATCH 07/23] selftests/bpf: Add backlog for network_helper_opts Some callers expect __start_server() helper to pass their own "backlog" value to listen() instead of the default of 1. So this patch adds struct member "backlog" for network_helper_opts to allow callers to set "backlog" value via start_server_str() helper. listen(fd, 0 /* backlog */) can be used to enforce syncookie. Meaning backlog 0 is a legit value. Using 0 as a default and changing it to 1 here is fine. It makes the test program easier to write for the common case. Enforcing syncookie mode by using backlog 0 is a niche use case but it should at least have a way for the caller to do that. Thus, -ve backlog value is used here for the syncookie use case. Please see the comment in network_helpers.h for the details. Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/1660229659b66eaad07aa2126e9c9fe217eba0dd.1720515893.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/network_helpers.c | 2 +- tools/testing/selftests/bpf/network_helpers.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 44c2c8fa542abe..e0cba4178e41d3 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -106,7 +106,7 @@ static int __start_server(int type, const struct sockaddr *addr, socklen_t addrl } if (type == SOCK_STREAM) { - if (listen(fd, 1) < 0) { + if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) { log_err("Failed to listed on socket"); goto error_close; } diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 9ea36524b9db47..aac5b94d637991 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -25,6 +25,16 @@ struct network_helper_opts { int timeout_ms; bool must_fail; int proto; + /* +ve: Passed to listen() as-is. + * 0: Default when the test does not set + * a particular value during the struct init. + * It is changed to 1 before passing to listen(). + * Most tests only have one on-going connection. + * -ve: It is changed to 0 before passing to listen(). + * It is useful to force syncookie without + * changing the "tcp_syncookies" sysctl from 1 to 2. + */ + int backlog; int (*post_socket_cb)(int fd, void *opts); void *cb_opts; }; From 7046345d48adcc3f519e7b6192184f6049908bdb Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 9 Jul 2024 17:16:18 +0800 Subject: [PATCH 08/23] selftests/bpf: Add ASSERT_OK_FD macro Add a new dedicated ASSERT macro ASSERT_OK_FD to test whether a socket FD is valid or not. It can be used to replace macros ASSERT_GT(fd, 0, ""), ASSERT_NEQ(fd, -1, "") or statements (fd < 0), (fd != -1). Suggested-by: Martin KaFai Lau Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/ded75be86ac630a3a5099739431854c1ec33f0ea.1720515893.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_progs.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 0ba5a20b19ba8e..51341d50213b9d 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -377,6 +377,15 @@ int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_OK_FD(fd, name) ({ \ + static int duration = 0; \ + int ___fd = (fd); \ + bool ___ok = ___fd >= 0; \ + CHECK(!___ok, (name), "unexpected fd: %d (errno %d)\n", \ + ___fd, errno); \ + ___ok; \ +}) + #define SYS(goto_label, fmt, ...) \ ({ \ char cmd[1024]; \ From adae187ebedcd95d02f045bc37dfecfd5b29434b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 9 Jul 2024 17:16:19 +0800 Subject: [PATCH 09/23] selftests/bpf: Close fd in error path in drop_on_reuseport In the error path when update_lookup_map() fails in drop_on_reuseport in prog_tests/sk_lookup.c, "server1", the fd of server 1, should be closed. This patch fixes this by using "goto close_srv1" lable instead of "detach" to close "server1" in this case. Fixes: 0ab5539f8584 ("selftests/bpf: Tests for BPF_SK_LOOKUP attach point") Acked-by: Eduard Zingerman Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/86aed33b4b0ea3f04497c757845cff7e8e621a2d.1720515893.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 597d0467a92675..de2466547efe0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -994,7 +994,7 @@ static void drop_on_reuseport(const struct test *t) err = update_lookup_map(t->sock_map, SERVER_A, server1); if (err) - goto detach; + goto close_srv1; /* second server on destination address we should never reach */ server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port, From 14fc6fcd35e7dde6d1de062b6711476b3050b22e Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 9 Jul 2024 17:16:20 +0800 Subject: [PATCH 10/23] selftests/bpf: Use start_server_str in sk_lookup This patch uses public helper start_server_str() to simplify make_server() in prog_tests/sk_lookup.c. Add a callback setsockopts() to do all sockopts, set it to post_socket_cb pointer of struct network_helper_opts. And add a new struct cb_opts to save the data needed to pass to the callback. Then pass this network_helper_opts to start_server_str(). Also use ASSERT_OK_FD() to check fd returned by start_server_str(). Acked-by: Eduard Zingerman Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/5981539f5591d2c4998c962ef2bf45f34c940548.1720515893.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/sk_lookup.c | 58 +++++++++++-------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index de2466547efe0f..20ee5da2c7216d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -77,6 +77,12 @@ struct test { bool reuseport_has_conns; /* Add a connected socket to reuseport group */ }; +struct cb_opts { + int family; + int sotype; + bool reuseport; +}; + static __u32 duration; /* for CHECK macro */ static bool is_ipv6(const char *ip) @@ -142,19 +148,14 @@ static int make_socket(int sotype, const char *ip, int port, return fd; } -static int make_server(int sotype, const char *ip, int port, - struct bpf_program *reuseport_prog) +static int setsockopts(int fd, void *opts) { - struct sockaddr_storage addr = {0}; + struct cb_opts *co = (struct cb_opts *)opts; const int one = 1; - int err, fd = -1; - - fd = make_socket(sotype, ip, port, &addr); - if (fd < 0) - return -1; + int err = 0; /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */ - if (sotype == SOCK_DGRAM) { + if (co->sotype == SOCK_DGRAM) { err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one, sizeof(one)); if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) { @@ -163,7 +164,7 @@ static int make_server(int sotype, const char *ip, int port, } } - if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) { + if (co->sotype == SOCK_DGRAM && co->family == AF_INET6) { err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one, sizeof(one)); if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) { @@ -172,7 +173,7 @@ static int make_server(int sotype, const char *ip, int port, } } - if (sotype == SOCK_STREAM) { + if (co->sotype == SOCK_STREAM) { err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) { @@ -181,7 +182,7 @@ static int make_server(int sotype, const char *ip, int port, } } - if (reuseport_prog) { + if (co->reuseport) { err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) { @@ -190,19 +191,28 @@ static int make_server(int sotype, const char *ip, int port, } } - err = bind(fd, (void *)&addr, inetaddr_len(&addr)); - if (CHECK(err, "bind", "failed\n")) { - log_err("failed to bind listen socket"); - goto fail; - } +fail: + return err; +} - if (sotype == SOCK_STREAM) { - err = listen(fd, SOMAXCONN); - if (CHECK(err, "make_server", "listen")) { - log_err("failed to listen on port %d", port); - goto fail; - } - } +static int make_server(int sotype, const char *ip, int port, + struct bpf_program *reuseport_prog) +{ + struct cb_opts cb_opts = { + .family = is_ipv6(ip) ? AF_INET6 : AF_INET, + .sotype = sotype, + .reuseport = reuseport_prog, + }; + struct network_helper_opts opts = { + .backlog = SOMAXCONN, + .post_socket_cb = setsockopts, + .cb_opts = &cb_opts, + }; + int err, fd; + + fd = start_server_str(cb_opts.family, sotype, ip, port, &opts); + if (!ASSERT_OK_FD(fd, "start_server_str")) + return -1; /* Late attach reuseport prog so we can have one init path */ if (reuseport_prog) { From d9810c43f660fd502c5003244a5e9c181aa7df99 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 9 Jul 2024 17:16:21 +0800 Subject: [PATCH 11/23] selftests/bpf: Use start_server_addr in sk_lookup This patch uses public helper start_server_addr() in udp_recv_send() in prog_tests/sk_lookup.c to simplify the code. And use ASSERT_OK_FD() to check fd returned by start_server_addr(). Acked-by: Eduard Zingerman Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/f11cabfef4a2170ecb66a1e8e2e72116d8f621b3.1720515893.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 20ee5da2c7216d..386e482be61709 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -416,18 +416,12 @@ static int udp_recv_send(int server_fd) } /* Reply from original destination address. */ - fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0); - if (CHECK(fd < 0, "socket", "failed\n")) { + fd = start_server_addr(SOCK_DGRAM, dst_addr, sizeof(*dst_addr), NULL); + if (!ASSERT_OK_FD(fd, "start_server_addr")) { log_err("failed to create tx socket"); return -1; } - ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr)); - if (CHECK(ret, "bind", "failed\n")) { - log_err("failed to bind tx socket"); - goto out; - } - msg.msg_control = NULL; msg.msg_controllen = 0; n = sendmsg(fd, &msg, 0); From 9004054b1629d481fedea2d92b880f79fc6fa81b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 9 Jul 2024 17:16:22 +0800 Subject: [PATCH 12/23] selftests/bpf: Use connect_fd_to_fd in sk_lookup This patch uses public helper connect_fd_to_fd() exported in network_helpers.h instead of using getsockname() + connect() in run_lookup_prog() in prog_tests/sk_lookup.c. This can simplify the code. Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/7077c277cde5a1864cdc244727162fb75c8bb9c5.1720515893.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 386e482be61709..ae87c00867ba42 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -633,9 +633,6 @@ static void run_lookup_prog(const struct test *t) * BPF socket lookup. */ if (t->reuseport_has_conns) { - struct sockaddr_storage addr = {}; - socklen_t len = sizeof(addr); - /* Add an extra socket to reuseport group */ reuse_conn_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port, @@ -643,12 +640,9 @@ static void run_lookup_prog(const struct test *t) if (reuse_conn_fd < 0) goto close; - /* Connect the extra socket to itself */ - err = getsockname(reuse_conn_fd, (void *)&addr, &len); - if (CHECK(err, "getsockname", "errno %d\n", errno)) - goto close; - err = connect(reuse_conn_fd, (void *)&addr, len); - if (CHECK(err, "connect", "errno %d\n", errno)) + /* Connect the extra socket to itself */ + err = connect_fd_to_fd(reuse_conn_fd, reuse_conn_fd, 0); + if (!ASSERT_OK(err, "connect_fd_to_fd")) goto close; } From eef0532e900c20a6760da829e82dac3ee18688c5 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 10 Jul 2024 21:10:16 +0800 Subject: [PATCH 13/23] selftests/bpf: Null checks for links in bpf_tcp_ca Run bpf_tcp_ca selftests (./test_progs -t bpf_tcp_ca) on a Loongarch platform, some "Segmentation fault" errors occur: ''' test_dctcp:PASS:bpf_dctcp__open_and_load 0 nsec test_dctcp:FAIL:bpf_map__attach_struct_ops unexpected error: -524 #29/1 bpf_tcp_ca/dctcp:FAIL test_cubic:PASS:bpf_cubic__open_and_load 0 nsec test_cubic:FAIL:bpf_map__attach_struct_ops unexpected error: -524 #29/2 bpf_tcp_ca/cubic:FAIL test_dctcp_fallback:PASS:dctcp_skel 0 nsec test_dctcp_fallback:PASS:bpf_dctcp__load 0 nsec test_dctcp_fallback:FAIL:dctcp link unexpected error: -524 #29/4 bpf_tcp_ca/dctcp_fallback:FAIL test_write_sk_pacing:PASS:open_and_load 0 nsec test_write_sk_pacing:FAIL:attach_struct_ops unexpected error: -524 #29/6 bpf_tcp_ca/write_sk_pacing:FAIL test_update_ca:PASS:open 0 nsec test_update_ca:FAIL:attach_struct_ops unexpected error: -524 settcpca:FAIL:setsockopt unexpected setsockopt: \ actual -1 == expected -1 (network_helpers.c:99: errno: No such file or directory) \ Failed to call post_socket_cb start_test:FAIL:start_server_str unexpected start_server_str: \ actual -1 == expected -1 test_update_ca:FAIL:ca1_ca1_cnt unexpected ca1_ca1_cnt: \ actual 0 <= expected 0 #29/9 bpf_tcp_ca/update_ca:FAIL #29 bpf_tcp_ca:FAIL Caught signal #11! Stack trace: ./test_progs(crash_handler+0x28)[0x5555567ed91c] linux-vdso.so.1(__vdso_rt_sigreturn+0x0)[0x7ffffee408b0] ./test_progs(bpf_link__update_map+0x80)[0x555556824a78] ./test_progs(+0x94d68)[0x5555564c4d68] ./test_progs(test_bpf_tcp_ca+0xe8)[0x5555564c6a88] ./test_progs(+0x3bde54)[0x5555567ede54] ./test_progs(main+0x61c)[0x5555567efd54] /usr/lib64/libc.so.6(+0x22208)[0x7ffff2aaa208] /usr/lib64/libc.so.6(__libc_start_main+0xac)[0x7ffff2aaa30c] ./test_progs(_start+0x48)[0x55555646bca8] Segmentation fault ''' This is because BPF trampoline is not implemented on Loongarch yet, "link" returned by bpf_map__attach_struct_ops() is NULL. test_progs crashs when this NULL link passes to bpf_link__update_map(). This patch adds NULL checks for all links in bpf_tcp_ca to fix these errors. If "link" is NULL, goto the newly added label "out" to destroy the skel. v2: - use "goto out" instead of "return" as Eduard suggested. Fixes: 06da9f3bd641 ("selftests/bpf: Test switching TCP Congestion Control algorithms.") Signed-off-by: Geliang Tang Reviewed-by: Alan Maguire Link: https://lore.kernel.org/r/b4c841492bd4ed97964e4e61e92827ce51bf1dc9.1720615848.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/bpf_tcp_ca.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index bceff590001689..63422f4f389675 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -411,7 +411,8 @@ static void test_update_ca(void) return; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); - ASSERT_OK_PTR(link, "attach_struct_ops"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto out; do_test(&opts); saved_ca1_cnt = skel->bss->ca1_cnt; @@ -425,6 +426,7 @@ static void test_update_ca(void) ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } @@ -447,7 +449,8 @@ static void test_update_wrong(void) return; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); - ASSERT_OK_PTR(link, "attach_struct_ops"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto out; do_test(&opts); saved_ca1_cnt = skel->bss->ca1_cnt; @@ -460,6 +463,7 @@ static void test_update_wrong(void) ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } @@ -481,7 +485,8 @@ static void test_mixed_links(void) return; link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link); - ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl"); + if (!ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl")) + goto out; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); @@ -494,6 +499,7 @@ static void test_mixed_links(void) bpf_link__destroy(link); bpf_link__destroy(link_nl); +out: tcp_ca_update__destroy(skel); } @@ -536,7 +542,8 @@ static void test_link_replace(void) bpf_link__destroy(link); link = bpf_map__attach_struct_ops(skel->maps.ca_update_2); - ASSERT_OK_PTR(link, "attach_struct_ops_2nd"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops_2nd")) + goto out; /* BPF_F_REPLACE with a wrong old map Fd. It should fail! * @@ -559,6 +566,7 @@ static void test_link_replace(void) bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } From 52b49ec1b2c78deb258596c3b231201445ef5380 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 10 Jul 2024 21:10:17 +0800 Subject: [PATCH 14/23] selftests/bpf: Close obj in error path in xdp_adjust_tail If bpf_object__load() fails in test_xdp_adjust_frags_tail_grow(), "obj" opened before this should be closed. So use "goto out" to close it instead of using "return" here. Fixes: 110221081aac ("bpf: selftests: update xdp_adjust_tail selftest to include xdp frags") Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/f282a1ed2d0e3fb38cceefec8e81cabb69cab260.1720615848.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index f09505f8b0386c..53d6ad8c2257eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -222,7 +222,7 @@ static void test_xdp_adjust_frags_tail_grow(void) prog = bpf_object__next_program(obj, NULL); if (bpf_object__load(obj)) - return; + goto out; prog_fd = bpf_program__fd(prog); From 19d3c179a37730caf600a97fed3794feac2b197b Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 11 Jul 2024 15:18:38 +0000 Subject: [PATCH 15/23] bpf, arm64: Fix trampoline for BPF_TRAMP_F_CALL_ORIG When BPF_TRAMP_F_CALL_ORIG is set, the trampoline calls __bpf_tramp_enter() and __bpf_tramp_exit() functions, passing them the struct bpf_tramp_image *im pointer as an argument in R0. The trampoline generation code uses emit_addr_mov_i64() to emit instructions for moving the bpf_tramp_image address into R0, but emit_addr_mov_i64() assumes the address to be in the vmalloc() space and uses only 48 bits. Because bpf_tramp_image is allocated using kzalloc(), its address can use more than 48-bits, in this case the trampoline will pass an invalid address to __bpf_tramp_enter/exit() causing a kernel crash. Fix this by using emit_a64_mov_i64() in place of emit_addr_mov_i64() as it can work with addresses that are greater than 48-bits. Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64") Signed-off-by: Puranjay Mohan Signed-off-by: Daniel Borkmann Closes: https://lore.kernel.org/all/SJ0PR15MB461564D3F7E7A763498CA6A8CBDB2@SJ0PR15MB4615.namprd15.prod.outlook.com/ Link: https://lore.kernel.org/bpf/20240711151838.43469-1-puranjay@kernel.org --- arch/arm64/net/bpf_jit_comp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 751331f5ba9061..dd0bb069df4bbe 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -2147,7 +2147,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); if (flags & BPF_TRAMP_F_CALL_ORIG) { - emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); emit_call((const u64)__bpf_tramp_enter, ctx); } @@ -2191,7 +2191,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_CALL_ORIG) { im->ip_epilogue = ctx->ro_image + ctx->idx; - emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); emit_call((const u64)__bpf_tramp_exit, ctx); } From b3470da314fd8018ee237e382000c4154a942420 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Thu, 11 Jul 2024 19:23:21 +0100 Subject: [PATCH 16/23] bpf: annotate BTF show functions with __printf -Werror=suggest-attribute=format warns about two functions in kernel/bpf/btf.c [1]; add __printf() annotations to silence these warnings since for CONFIG_WERROR=y they will trigger build failures. [1] https://lore.kernel.org/bpf/a8b20c72-6631-4404-9e1f-0410642d7d20@gmail.com/ Fixes: 31d0bc81637d ("bpf: Move to generic BTF show support, apply it to seq files/strings") Reported-by: Mirsad Todorovac Signed-off-by: Alan Maguire Tested-by: Mirsad Todorovac Link: https://lore.kernel.org/r/20240711182321.963667-1-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4ff11779699ef3..d5019c4454d64f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7538,8 +7538,8 @@ static void btf_type_show(const struct btf *btf, u32 type_id, void *obj, btf_type_ops(t)->show(btf, t, type_id, obj, 0, show); } -static void btf_seq_show(struct btf_show *show, const char *fmt, - va_list args) +__printf(2, 0) static void btf_seq_show(struct btf_show *show, const char *fmt, + va_list args) { seq_vprintf((struct seq_file *)show->target, fmt, args); } @@ -7572,8 +7572,8 @@ struct btf_show_snprintf { int len; /* length we would have written */ }; -static void btf_snprintf_show(struct btf_show *show, const char *fmt, - va_list args) +__printf(2, 0) static void btf_snprintf_show(struct btf_show *show, const char *fmt, + va_list args) { struct btf_show_snprintf *ssnprintf = (struct btf_show_snprintf *)show; int len; From 2454075f8e2915cebbe52a1195631bc7efe2b7e1 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 12 Jul 2024 10:28:59 +0100 Subject: [PATCH 17/23] bpf: Eliminate remaining "make W=1" warnings in kernel/bpf/btf.o MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As reported by Mirsad [1] we still see format warnings in kernel/bpf/btf.o at W=1 warning level: CC kernel/bpf/btf.o ./kernel/bpf/btf.c: In function ‘btf_type_seq_show_flags’: ./kernel/bpf/btf.c:7553:21: warning: assignment left-hand side might be a candidate for a format attribute [-Wsuggest-attribute=format] 7553 | sseq.showfn = btf_seq_show; | ^ ./kernel/bpf/btf.c: In function ‘btf_type_snprintf_show’: ./kernel/bpf/btf.c:7604:31: warning: assignment left-hand side might be a candidate for a format attribute [-Wsuggest-attribute=format] 7604 | ssnprintf.show.showfn = btf_snprintf_show; | ^ Combined with CONFIG_WERROR=y these can halt the build. The fix (annotating the structure field with __printf()) suggested by Mirsad resolves these. Apologies I missed this last time. No other W=1 warnings were observed in kernel/bpf after this fix. [1] https://lore.kernel.org/bpf/92c9d047-f058-400c-9c7d-81d4dc1ef71b@gmail.com/ Fixes: b3470da314fd ("bpf: annotate BTF show functions with __printf") Reported-by: Mirsad Todorovac Suggested-by: Mirsad Todorovac Signed-off-by: Alan Maguire Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240712092859.1390960-1-alan.maguire@oracle.com --- kernel/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index d5019c4454d64f..520f49f422fee2 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -415,7 +415,7 @@ const char *btf_type_str(const struct btf_type *t) struct btf_show { u64 flags; void *target; /* target of show operation (seq file, buffer) */ - void (*showfn)(struct btf_show *show, const char *fmt, va_list args); + __printf(2, 0) void (*showfn)(struct btf_show *show, const char *fmt, va_list args); const struct btf *btf; /* below are used during iteration */ struct { From 4a04b4f0de59dd5c621e78f15803ee0b0544eeb8 Mon Sep 17 00:00:00 2001 From: Shung-Hsi Yu Date: Fri, 12 Jul 2024 16:01:24 +0800 Subject: [PATCH 18/23] bpf: fix overflow check in adjust_jmp_off() adjust_jmp_off() incorrectly used the insn->imm field for all overflow check, which is incorrect as that should only be done or the BPF_JMP32 | BPF_JA case, not the general jump instruction case. Fix it by using insn->off for overflow check in the general case. Fixes: 5337ac4c9b80 ("bpf: Fix the corner case with may_goto and jump to the 1st insn.") Signed-off-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20240712080127.136608-2-shung-hsi.yu@suse.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c0263fb5ca4b99..cf2eb07ddf28cd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18852,7 +18852,7 @@ static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta) } else { if (i + 1 + insn->off != tgt_idx) continue; - if (signed_add16_overflows(insn->imm, delta)) + if (signed_add16_overflows(insn->off, delta)) return -ERANGE; insn->off += delta; } From 28a4411076b254c67842348e3b25c2fb41a94cad Mon Sep 17 00:00:00 2001 From: Shung-Hsi Yu Date: Fri, 12 Jul 2024 16:01:25 +0800 Subject: [PATCH 19/23] bpf: use check_add_overflow() to check for addition overflows signed_add*_overflows() was added back when there was no overflow-check helper. With the introduction of such helpers in commit f0907827a8a91 ("compiler.h: enable builtin overflow checkers and add fallback code"), we can drop signed_add*_overflows() in kernel/bpf/verifier.c and use the generic check_add_overflow() instead. This will make future refactoring easier, and takes advantage of compiler-emitted hardware instructions that efficiently implement these checks. After the change GCC 13.3.0 generates cleaner assembly on x86_64: err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); 13625: mov 0x28(%rbx),%r9 /* r9 = src_reg->smin_value */ 13629: mov 0x30(%rbx),%rcx /* rcx = src_reg->smax_value */ ... if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) || 141c1: mov %r9,%rax 141c4: add 0x28(%r12),%rax 141c9: mov %rax,0x28(%r12) 141ce: jo 146e4 check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) { 141d4: add 0x30(%r12),%rcx 141d9: mov %rcx,0x30(%r12) if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) || 141de: jo 146e4 ... *dst_smin = S64_MIN; 146e4: movabs $0x8000000000000000,%rax 146ee: mov %rax,0x28(%r12) *dst_smax = S64_MAX; 146f3: sub $0x1,%rax 146f7: mov %rax,0x30(%r12) Before the change it gives: s64 smin_val = src_reg->smin_value; 675: mov 0x28(%rsi),%r8 s64 smax_val = src_reg->smax_value; u64 umin_val = src_reg->umin_value; u64 umax_val = src_reg->umax_value; 679: mov %rdi,%rax /* rax = dst_reg */ if (signed_add_overflows(dst_reg->smin_value, smin_val) || 67c: mov 0x28(%rdi),%rdi /* rdi = dst_reg->smin_value */ u64 umin_val = src_reg->umin_value; 680: mov 0x38(%rsi),%rdx u64 umax_val = src_reg->umax_value; 684: mov 0x40(%rsi),%rcx s64 res = (s64)((u64)a + (u64)b); 688: lea (%r8,%rdi,1),%r9 /* r9 = dst_reg->smin_value + src_reg->smin_value */ return res < a; 68c: cmp %r9,%rdi 68f: setg %r10b /* r10b = (dst_reg->smin_value + src_reg->smin_value) > dst_reg->smin_value */ if (b < 0) 693: test %r8,%r8 696: js 72b signed_add_overflows(dst_reg->smax_value, smax_val)) { dst_reg->smin_value = S64_MIN; dst_reg->smax_value = S64_MAX; 69c: movabs $0x7fffffffffffffff,%rdi s64 smax_val = src_reg->smax_value; 6a6: mov 0x30(%rsi),%r8 dst_reg->smin_value = S64_MIN; 6aa: 00 00 00 movabs $0x8000000000000000,%rsi if (signed_add_overflows(dst_reg->smin_value, smin_val) || 6b4: test %r10b,%r10b /* (dst_reg->smin_value + src_reg->smin_value) > dst_reg->smin_value ? goto 6cb */ 6b7: jne 6cb signed_add_overflows(dst_reg->smax_value, smax_val)) { 6b9: mov 0x30(%rax),%r10 /* r10 = dst_reg->smax_value */ s64 res = (s64)((u64)a + (u64)b); 6bd: lea (%r10,%r8,1),%r11 /* r11 = dst_reg->smax_value + src_reg->smax_value */ if (b < 0) 6c1: test %r8,%r8 6c4: js 71e if (signed_add_overflows(dst_reg->smin_value, smin_val) || 6c6: cmp %r11,%r10 /* (dst_reg->smax_value + src_reg->smax_value) <= dst_reg->smax_value ? goto 723 */ 6c9: jle 723 } else { dst_reg->smin_value += smin_val; dst_reg->smax_value += smax_val; } 6cb: mov %rsi,0x28(%rax) ... 6d5: mov %rdi,0x30(%rax) ... if (signed_add_overflows(dst_reg->smin_value, smin_val) || 71e: cmp %r11,%r10 721: jl 6cb dst_reg->smin_value += smin_val; 723: mov %r9,%rsi dst_reg->smax_value += smax_val; 726: mov %r11,%rdi 729: jmp 6cb return res > a; 72b: cmp %r9,%rdi 72e: setl %r10b 732: jmp 69c 737: nopw 0x0(%rax,%rax,1) Note: unlike adjust_ptr_min_max_vals() and scalar*_min_max_add(), it is necessary to introduce intermediate variable in adjust_jmp_off() to keep the functional behavior unchanged. Without an intermediate variable imm/off will be altered even on overflow. Suggested-by: Jiri Olsa Signed-off-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20240712080127.136608-3-shung-hsi.yu@suse.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 114 +++++++++++++----------------------------- 1 file changed, 34 insertions(+), 80 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cf2eb07ddf28cd..0126c9c80c58e9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12726,36 +12726,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return 0; } -static bool signed_add_overflows(s64 a, s64 b) -{ - /* Do the add in u64, where overflow is well-defined */ - s64 res = (s64)((u64)a + (u64)b); - - if (b < 0) - return res > a; - return res < a; -} - -static bool signed_add32_overflows(s32 a, s32 b) -{ - /* Do the add in u32, where overflow is well-defined */ - s32 res = (s32)((u32)a + (u32)b); - - if (b < 0) - return res > a; - return res < a; -} - -static bool signed_add16_overflows(s16 a, s16 b) -{ - /* Do the add in u16, where overflow is well-defined */ - s16 res = (s16)((u16)a + (u16)b); - - if (b < 0) - return res > a; - return res < a; -} - static bool signed_sub_overflows(s64 a, s64 b) { /* Do the sub in u64, where overflow is well-defined */ @@ -13257,21 +13227,15 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, * added into the variable offset, and we copy the fixed offset * from ptr_reg. */ - if (signed_add_overflows(smin_ptr, smin_val) || - signed_add_overflows(smax_ptr, smax_val)) { + if (check_add_overflow(smin_ptr, smin_val, &dst_reg->smin_value) || + check_add_overflow(smax_ptr, smax_val, &dst_reg->smax_value)) { dst_reg->smin_value = S64_MIN; dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value = smin_ptr + smin_val; - dst_reg->smax_value = smax_ptr + smax_val; } - if (umin_ptr + umin_val < umin_ptr || - umax_ptr + umax_val < umax_ptr) { + if (check_add_overflow(umin_ptr, umin_val, &dst_reg->umin_value) || + check_add_overflow(umax_ptr, umax_val, &dst_reg->umax_value)) { dst_reg->umin_value = 0; dst_reg->umax_value = U64_MAX; - } else { - dst_reg->umin_value = umin_ptr + umin_val; - dst_reg->umax_value = umax_ptr + umax_val; } dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; @@ -13374,52 +13338,40 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s32 smin_val = src_reg->s32_min_value; - s32 smax_val = src_reg->s32_max_value; - u32 umin_val = src_reg->u32_min_value; - u32 umax_val = src_reg->u32_max_value; + s32 *dst_smin = &dst_reg->s32_min_value; + s32 *dst_smax = &dst_reg->s32_max_value; + u32 *dst_umin = &dst_reg->u32_min_value; + u32 *dst_umax = &dst_reg->u32_max_value; - if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) || - signed_add32_overflows(dst_reg->s32_max_value, smax_val)) { - dst_reg->s32_min_value = S32_MIN; - dst_reg->s32_max_value = S32_MAX; - } else { - dst_reg->s32_min_value += smin_val; - dst_reg->s32_max_value += smax_val; + if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) || + check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) { + *dst_smin = S32_MIN; + *dst_smax = S32_MAX; } - if (dst_reg->u32_min_value + umin_val < umin_val || - dst_reg->u32_max_value + umax_val < umax_val) { - dst_reg->u32_min_value = 0; - dst_reg->u32_max_value = U32_MAX; - } else { - dst_reg->u32_min_value += umin_val; - dst_reg->u32_max_value += umax_val; + if (check_add_overflow(*dst_umin, src_reg->u32_min_value, dst_umin) || + check_add_overflow(*dst_umax, src_reg->u32_max_value, dst_umax)) { + *dst_umin = 0; + *dst_umax = U32_MAX; } } static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s64 smin_val = src_reg->smin_value; - s64 smax_val = src_reg->smax_value; - u64 umin_val = src_reg->umin_value; - u64 umax_val = src_reg->umax_value; + s64 *dst_smin = &dst_reg->smin_value; + s64 *dst_smax = &dst_reg->smax_value; + u64 *dst_umin = &dst_reg->umin_value; + u64 *dst_umax = &dst_reg->umax_value; - if (signed_add_overflows(dst_reg->smin_value, smin_val) || - signed_add_overflows(dst_reg->smax_value, smax_val)) { - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value += smin_val; - dst_reg->smax_value += smax_val; + if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) || + check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) { + *dst_smin = S64_MIN; + *dst_smax = S64_MAX; } - if (dst_reg->umin_value + umin_val < umin_val || - dst_reg->umax_value + umax_val < umax_val) { - dst_reg->umin_value = 0; - dst_reg->umax_value = U64_MAX; - } else { - dst_reg->umin_value += umin_val; - dst_reg->umax_value += umax_val; + if (check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin) || + check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax)) { + *dst_umin = 0; + *dst_umax = U64_MAX; } } @@ -18835,6 +18787,8 @@ static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta) { struct bpf_insn *insn = prog->insnsi; u32 insn_cnt = prog->len, i; + s32 imm; + s16 off; for (i = 0; i < insn_cnt; i++, insn++) { u8 code = insn->code; @@ -18846,15 +18800,15 @@ static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta) if (insn->code == (BPF_JMP32 | BPF_JA)) { if (i + 1 + insn->imm != tgt_idx) continue; - if (signed_add32_overflows(insn->imm, delta)) + if (check_add_overflow(insn->imm, delta, &imm)) return -ERANGE; - insn->imm += delta; + insn->imm = imm; } else { if (i + 1 + insn->off != tgt_idx) continue; - if (signed_add16_overflows(insn->off, delta)) + if (check_add_overflow(insn->off, delta, &off)) return -ERANGE; - insn->off += delta; + insn->off = off; } } return 0; From deac5871eb0751454cb80b3ff6b69e42a6c1bab2 Mon Sep 17 00:00:00 2001 From: Shung-Hsi Yu Date: Fri, 12 Jul 2024 16:01:26 +0800 Subject: [PATCH 20/23] bpf: use check_sub_overflow() to check for subtraction overflows Similar to previous patch that drops signed_add*_overflows() and uses (compiler) builtin-based check_add_overflow(), do the same for signed_sub*_overflows() and replace them with the generic check_sub_overflow() to make future refactoring easier and have the checks implemented more efficiently. Unsigned overflow check for subtraction does not use helpers and are simple enough already, so they're left untouched. After the change GCC 13.3.0 generates cleaner assembly on x86_64: if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) || 139bf: mov 0x28(%r12),%rax 139c4: mov %edx,0x54(%r12) 139c9: sub %r11,%rax 139cc: mov %rax,0x28(%r12) 139d1: jo 14627 check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) { 139d7: mov 0x30(%r12),%rax 139dc: sub %r9,%rax 139df: mov %rax,0x30(%r12) if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) || 139e4: jo 14627 ... *dst_smin = S64_MIN; 14627: movabs $0x8000000000000000,%rax 14631: mov %rax,0x28(%r12) *dst_smax = S64_MAX; 14636: sub $0x1,%rax 1463a: mov %rax,0x30(%r12) Before the change it gives: if (signed_sub_overflows(dst_reg->smin_value, smax_val) || 13a50: mov 0x28(%r12),%rdi 13a55: mov %edx,0x54(%r12) dst_reg->smax_value = S64_MAX; 13a5a: movabs $0x7fffffffffffffff,%rdx 13a64: mov %eax,0x50(%r12) dst_reg->smin_value = S64_MIN; 13a69: movabs $0x8000000000000000,%rax s64 res = (s64)((u64)a - (u64)b); 13a73: mov %rdi,%rsi 13a76: sub %rcx,%rsi if (b < 0) 13a79: test %rcx,%rcx 13a7c: js 145ea if (signed_sub_overflows(dst_reg->smin_value, smax_val) || 13a82: cmp %rsi,%rdi 13a85: jl 13ac7 signed_sub_overflows(dst_reg->smax_value, smin_val)) { 13a87: mov 0x30(%r12),%r8 s64 res = (s64)((u64)a - (u64)b); 13a8c: mov %r8,%rax 13a8f: sub %r9,%rax return res > a; 13a92: cmp %rax,%r8 13a95: setl %sil if (b < 0) 13a99: test %r9,%r9 13a9c: js 147d1 dst_reg->smax_value = S64_MAX; 13aa2: movabs $0x7fffffffffffffff,%rdx dst_reg->smin_value = S64_MIN; 13aac: movabs $0x8000000000000000,%rax if (signed_sub_overflows(dst_reg->smin_value, smax_val) || 13ab6: test %sil,%sil 13ab9: jne 13ac7 dst_reg->smin_value -= smax_val; 13abb: mov %rdi,%rax dst_reg->smax_value -= smin_val; 13abe: mov %r8,%rdx dst_reg->smin_value -= smax_val; 13ac1: sub %rcx,%rax dst_reg->smax_value -= smin_val; 13ac4: sub %r9,%rdx 13ac7: mov %rax,0x28(%r12) ... 13ad1: mov %rdx,0x30(%r12) ... if (signed_sub_overflows(dst_reg->smin_value, smax_val) || 145ea: cmp %rsi,%rdi 145ed: jg 13ac7 145f3: jmp 13a87 Suggested-by: Jiri Olsa Signed-off-by: Shung-Hsi Yu Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20240712080127.136608-4-shung-hsi.yu@suse.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 57 +++++++++++-------------------------------- 1 file changed, 14 insertions(+), 43 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0126c9c80c58e9..8da132a1ef2818 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12726,26 +12726,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return 0; } -static bool signed_sub_overflows(s64 a, s64 b) -{ - /* Do the sub in u64, where overflow is well-defined */ - s64 res = (s64)((u64)a - (u64)b); - - if (b < 0) - return res < a; - return res > a; -} - -static bool signed_sub32_overflows(s32 a, s32 b) -{ - /* Do the sub in u32, where overflow is well-defined */ - s32 res = (s32)((u32)a - (u32)b); - - if (b < 0) - return res < a; - return res > a; -} - static bool check_reg_sane_offset(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, enum bpf_reg_type type) @@ -13278,14 +13258,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, /* A new variable offset is created. If the subtrahend is known * nonnegative, then any reg->range we had before is still good. */ - if (signed_sub_overflows(smin_ptr, smax_val) || - signed_sub_overflows(smax_ptr, smin_val)) { + if (check_sub_overflow(smin_ptr, smax_val, &dst_reg->smin_value) || + check_sub_overflow(smax_ptr, smin_val, &dst_reg->smax_value)) { /* Overflow possible, we know nothing */ dst_reg->smin_value = S64_MIN; dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value = smin_ptr - smax_val; - dst_reg->smax_value = smax_ptr - smin_val; } if (umin_ptr < umax_val) { /* Overflow possible, we know nothing */ @@ -13378,19 +13355,16 @@ static void scalar_min_max_add(struct bpf_reg_state *dst_reg, static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s32 smin_val = src_reg->s32_min_value; - s32 smax_val = src_reg->s32_max_value; + s32 *dst_smin = &dst_reg->s32_min_value; + s32 *dst_smax = &dst_reg->s32_max_value; u32 umin_val = src_reg->u32_min_value; u32 umax_val = src_reg->u32_max_value; - if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) || - signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) { + if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) || + check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) { /* Overflow possible, we know nothing */ - dst_reg->s32_min_value = S32_MIN; - dst_reg->s32_max_value = S32_MAX; - } else { - dst_reg->s32_min_value -= smax_val; - dst_reg->s32_max_value -= smin_val; + *dst_smin = S32_MIN; + *dst_smax = S32_MAX; } if (dst_reg->u32_min_value < umax_val) { /* Overflow possible, we know nothing */ @@ -13406,19 +13380,16 @@ static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s64 smin_val = src_reg->smin_value; - s64 smax_val = src_reg->smax_value; + s64 *dst_smin = &dst_reg->smin_value; + s64 *dst_smax = &dst_reg->smax_value; u64 umin_val = src_reg->umin_value; u64 umax_val = src_reg->umax_value; - if (signed_sub_overflows(dst_reg->smin_value, smax_val) || - signed_sub_overflows(dst_reg->smax_value, smin_val)) { + if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) || + check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) { /* Overflow possible, we know nothing */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value -= smax_val; - dst_reg->smax_value -= smin_val; + *dst_smin = S64_MIN; + *dst_smax = S64_MAX; } if (dst_reg->umin_value < umax_val) { /* Overflow possible, we know nothing */ From 517125f6749402e579f715519147145944f12ad9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 12 Jul 2024 18:12:30 +0200 Subject: [PATCH 21/23] selftests/bpf: DENYLIST.aarch64: Skip fexit_sleep again Revert commit 90dc946059b7 ("selftests/bpf: DENYLIST.aarch64: Remove fexit_sleep") again. The fix in 19d3c179a377 ("bpf, arm64: Fix trampoline for BPF_TRAMP_F_CALL_ORIG") does not address all of the issues and BPF CI is still hanging and timing out: https://github.com/kernel-patches/bpf/actions/runs/9905842936/job/27366435436 [...] #89/11 fexit_bpf2bpf/func_replace_global_func:OK #89/12 fexit_bpf2bpf/fentry_to_cgroup_bpf:OK #89/13 fexit_bpf2bpf/func_replace_progmap:OK #89 fexit_bpf2bpf:OK Error: The operation was canceled. Thus more investigation work & fixing is needed before the test can be put in place again. Signed-off-by: Daniel Borkmann Cc: Puranjay Mohan Link: https://lore.kernel.org/bpf/20240705145009.32340-1-puranjay@kernel.org --- tools/testing/selftests/bpf/DENYLIST.aarch64 | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 901349da680fa6..3c7c3e79aa9317 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,5 +1,6 @@ bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 +fexit_sleep # The test never returns. The remaining tests cannot start. kprobe_multi_bench_attach # needs CONFIG_FPROBE kprobe_multi_test # needs CONFIG_FPROBE module_attach # prog 'kprobe_multi': failed to auto-attach: -95 From f7866c35873377313ff94398f17d425b28b71de1 Mon Sep 17 00:00:00 2001 From: Tengda Wu Date: Thu, 11 Jul 2024 22:58:18 +0800 Subject: [PATCH 22/23] bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT When loading a EXT program without specifying `attr->attach_prog_fd`, the `prog->aux->dst_prog` will be null. At this time, calling resolve_prog_type() anywhere will result in a null pointer dereference. Example stack trace: [ 8.107863] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 [ 8.108262] Mem abort info: [ 8.108384] ESR = 0x0000000096000004 [ 8.108547] EC = 0x25: DABT (current EL), IL = 32 bits [ 8.108722] SET = 0, FnV = 0 [ 8.108827] EA = 0, S1PTW = 0 [ 8.108939] FSC = 0x04: level 0 translation fault [ 8.109102] Data abort info: [ 8.109203] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 8.109399] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 8.109614] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 8.109836] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101354000 [ 8.110011] [0000000000000004] pgd=0000000000000000, p4d=0000000000000000 [ 8.112624] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [ 8.112783] Modules linked in: [ 8.113120] CPU: 0 PID: 99 Comm: may_access_dire Not tainted 6.10.0-rc3-next-20240613-dirty #1 [ 8.113230] Hardware name: linux,dummy-virt (DT) [ 8.113390] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 8.113429] pc : may_access_direct_pkt_data+0x24/0xa0 [ 8.113746] lr : add_subprog_and_kfunc+0x634/0x8e8 [ 8.113798] sp : ffff80008283b9f0 [ 8.113813] x29: ffff80008283b9f0 x28: ffff800082795048 x27: 0000000000000001 [ 8.113881] x26: ffff0000c0bb2600 x25: 0000000000000000 x24: 0000000000000000 [ 8.113897] x23: ffff0000c1134000 x22: 000000000001864f x21: ffff0000c1138000 [ 8.113912] x20: 0000000000000001 x19: ffff0000c12b8000 x18: ffffffffffffffff [ 8.113929] x17: 0000000000000000 x16: 0000000000000000 x15: 0720072007200720 [ 8.113944] x14: 0720072007200720 x13: 0720072007200720 x12: 0720072007200720 [ 8.113958] x11: 0720072007200720 x10: 0000000000f9fca4 x9 : ffff80008021f4e4 [ 8.113991] x8 : 0101010101010101 x7 : 746f72705f6d656d x6 : 000000001e0e0f5f [ 8.114006] x5 : 000000000001864f x4 : ffff0000c12b8000 x3 : 000000000000001c [ 8.114020] x2 : 0000000000000002 x1 : 0000000000000000 x0 : 0000000000000000 [ 8.114126] Call trace: [ 8.114159] may_access_direct_pkt_data+0x24/0xa0 [ 8.114202] bpf_check+0x3bc/0x28c0 [ 8.114214] bpf_prog_load+0x658/0xa58 [ 8.114227] __sys_bpf+0xc50/0x2250 [ 8.114240] __arm64_sys_bpf+0x28/0x40 [ 8.114254] invoke_syscall.constprop.0+0x54/0xf0 [ 8.114273] do_el0_svc+0x4c/0xd8 [ 8.114289] el0_svc+0x3c/0x140 [ 8.114305] el0t_64_sync_handler+0x134/0x150 [ 8.114331] el0t_64_sync+0x168/0x170 [ 8.114477] Code: 7100707f 54000081 f9401c00 f9403800 (b9400403) [ 8.118672] ---[ end trace 0000000000000000 ]--- One way to fix it is by forcing `attach_prog_fd` non-empty when bpf_prog_load(). But this will lead to `libbpf_probe_bpf_prog_type` API broken which use verifier log to probe prog type and will log nothing if we reject invalid EXT prog before bpf_check(). Another way is by adding null check in resolve_prog_type(). The issue was introduced by commit 4a9c7bbe2ed4 ("bpf: Resolve to prog->aux->dst_prog->type only for BPF_PROG_TYPE_EXT") which wanted to correct type resolution for BPF_PROG_TYPE_TRACING programs. Before that, the type resolution of BPF_PROG_TYPE_EXT prog actually follows the logic below: prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; It implies that when EXT program is not yet attached to `dst_prog`, the prog type should be EXT itself. This code worked fine in the past. So just keep using it. Fix this by returning `prog->type` for BPF_PROG_TYPE_EXT if `dst_prog` is not present in resolve_prog_type(). Fixes: 4a9c7bbe2ed4 ("bpf: Resolve to prog->aux->dst_prog->type only for BPF_PROG_TYPE_EXT") Signed-off-by: Tengda Wu Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Cc: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20240711145819.254178-2-wutengda@huaweicloud.com --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e98ba5a5cf7976..6503c85b10a30a 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -856,7 +856,7 @@ static inline u32 type_flag(u32 type) /* only use after check_attach_btf_id() */ static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { - return prog->type == BPF_PROG_TYPE_EXT ? + return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ? prog->aux->dst_prog->type : prog->type; } From e435b043d89a267bd6eb3d5650d2319805d7924a Mon Sep 17 00:00:00 2001 From: Tengda Wu Date: Thu, 11 Jul 2024 22:58:19 +0800 Subject: [PATCH 23/23] selftests/bpf: Test for null-pointer-deref bugfix in resolve_prog_type() This test verifies that resolve_prog_type() works as expected when `attach_prog_fd` is not passed in. `prog->aux->dst_prog` in resolve_prog_type() is assigned by `attach_prog_fd`, and would be NULL if `attach_prog_fd` is not provided. Loading EXT prog with bpf_dynptr_from_skb() kfunc call in this way will lead to null-pointer-deref. Verify that the null-pointer-deref bug in resolve_prog_type() is fixed. Signed-off-by: Tengda Wu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240711145819.254178-3-wutengda@huaweicloud.com --- tools/testing/selftests/bpf/verifier/calls.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index d76ef20188592e..d0cdd156cd5558 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -275,6 +275,19 @@ .result_unpriv = REJECT, .result = ACCEPT, }, +{ + "calls: invalid kfunc call: must provide (attach_prog_fd, btf_id) pair when freplace", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_EXT, + .result = REJECT, + .errstr = "Tracing programs must provide btf_id", + .fixup_kfunc_btf_id = { + { "bpf_dynptr_from_skb", 0 }, + }, +}, { "calls: basic sanity", .insns = {