Skip to content

Commit

Permalink
bpf: Add support for BTF pointers to x86 JIT
Browse files Browse the repository at this point in the history
Pointer to BTF object is a pointer to kernel object or NULL.
Such pointers can only be used by BPF_LDX instructions.
The verifier changed their opcode from LDX|MEM|size
to LDX|PROBE_MEM|size to make JITing easier.
The number of entries in extable is the number of BPF_LDX insns
that access kernel memory via "pointer to BTF type".
Only these load instructions can fault.
Since x86 extable is relative it has to be allocated in the same
memory region as JITed code.
Allocate it prior to last pass of JITing and let the last pass populate it.
Pointer to extable in bpf_prog_aux is necessary to make page fault
handling fast.
Page fault handling is done in two steps:
1. bpf_prog_kallsyms_find() finds BPF program that page faulted.
   It's done by walking rb tree.
2. then extable for given bpf program is binary searched.
This process is similar to how page faulting is done for kernel modules.
The exception handler skips over faulting x86 instruction and
initializes destination register with zero. This mimics exact
behavior of bpf_probe_read (when probe_kernel_read faults dest is zeroed).

JITs for other architectures can add support in similar way.
Until then they will reject unknown opcode and fallback to interpreter.

Since extable should be aligned and placed near JITed code
make bpf_jit_binary_alloc() return 4 byte aligned image offset,
so that extable aligning formula in bpf_int_jit_compile() doesn't need
to rely on internal implementation of bpf_jit_binary_alloc().
On x86 gcc defaults to 16-byte alignment for regular kernel functions
due to better performance. JITed code may be aligned to 16 in the future,
but it will use 4 in the meantime.

Signed-off-by: Alexei Starovoitov <[email protected]>
Signed-off-by: Daniel Borkmann <[email protected]>
Acked-by: Andrii Nakryiko <[email protected]>
Acked-by: Martin KaFai Lau <[email protected]>
Link: https://lore.kernel.org/bpf/[email protected]
  • Loading branch information
Alexei Starovoitov authored and borkmann committed Oct 17, 2019
1 parent 2a02759 commit 3dec541
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 5 deletions.
97 changes: 93 additions & 4 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>

#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>

Expand Down Expand Up @@ -123,6 +123,19 @@ static const int reg2hex[] = {
[AUX_REG] = 3, /* R11 temp register */
};

static const int reg2pt_regs[] = {
[BPF_REG_0] = offsetof(struct pt_regs, ax),
[BPF_REG_1] = offsetof(struct pt_regs, di),
[BPF_REG_2] = offsetof(struct pt_regs, si),
[BPF_REG_3] = offsetof(struct pt_regs, dx),
[BPF_REG_4] = offsetof(struct pt_regs, cx),
[BPF_REG_5] = offsetof(struct pt_regs, r8),
[BPF_REG_6] = offsetof(struct pt_regs, bx),
[BPF_REG_7] = offsetof(struct pt_regs, r13),
[BPF_REG_8] = offsetof(struct pt_regs, r14),
[BPF_REG_9] = offsetof(struct pt_regs, r15),
};

/*
* is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
* which need extra byte of encoding.
Expand Down Expand Up @@ -377,14 +390,27 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
*pprog = prog;
}


static bool ex_handler_bpf(const struct exception_table_entry *x,
struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr)
{
u32 reg = x->fixup >> 8;

/* jump over faulting load and clear dest register */
*(unsigned long *)((void *)regs + reg) = 0;
regs->ip += x->fixup & 0xff;
return true;
}

static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
struct bpf_insn *insn = bpf_prog->insnsi;
int insn_cnt = bpf_prog->len;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
int i, cnt = 0;
int i, cnt = 0, excnt = 0;
int proglen = 0;
u8 *prog = temp;

Expand Down Expand Up @@ -778,21 +804,25 @@ stx: if (is_imm8(insn->off))

/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
/* Emit 'movzx rax, byte ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_H:
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* Emit 'movzx rax, word ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_W:
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* Emit 'mov eax, dword ptr [rax+0x14]' */
if (is_ereg(dst_reg) || is_ereg(src_reg))
EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
else
EMIT1(0x8B);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_DW:
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
/* Emit 'mov rax, qword ptr [rax+0x14]' */
EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
ldx: /*
Expand All @@ -805,6 +835,48 @@ stx: if (is_imm8(insn->off))
else
EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
insn->off);
if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
struct exception_table_entry *ex;
u8 *_insn = image + proglen;
s64 delta;

if (!bpf_prog->aux->extable)
break;

if (excnt >= bpf_prog->aux->num_exentries) {
pr_err("ex gen bug\n");
return -EFAULT;
}
ex = &bpf_prog->aux->extable[excnt++];

delta = _insn - (u8 *)&ex->insn;
if (!is_simm32(delta)) {
pr_err("extable->insn doesn't fit into 32-bit\n");
return -EFAULT;
}
ex->insn = delta;

delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
if (!is_simm32(delta)) {
pr_err("extable->handler doesn't fit into 32-bit\n");
return -EFAULT;
}
ex->handler = delta;

if (dst_reg > BPF_REG_9) {
pr_err("verifier error\n");
return -EFAULT;
}
/*
* Compute size of x86 insn and its target dest x86 register.
* ex_handler_bpf() will use lower 8 bits to adjust
* pt_regs->ip to jump over this x86 instruction
* and upper bits to figure out which pt_regs to zero out.
* End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
* of 4 bytes will be ignored and rbx will be zero inited.
*/
ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
}
break;

/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
Expand Down Expand Up @@ -1058,6 +1130,11 @@ xadd: if (is_imm8(insn->off))
addrs[i] = proglen;
prog = temp;
}

if (image && excnt != bpf_prog->aux->num_exentries) {
pr_err("extable is not populated\n");
return -EFAULT;
}
return proglen;
}

Expand Down Expand Up @@ -1158,12 +1235,24 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
break;
}
if (proglen == oldproglen) {
header = bpf_jit_binary_alloc(proglen, &image,
1, jit_fill_hole);
/*
* The number of entries in extable is the number of BPF_LDX
* insns that access kernel memory via "pointer to BTF type".
* The verifier changed their opcode from LDX|MEM|size
* to LDX|PROBE_MEM|size to make JITing easier.
*/
u32 align = __alignof__(struct exception_table_entry);
u32 extable_size = prog->aux->num_exentries *
sizeof(struct exception_table_entry);

/* allocate module memory for x86 insns and extable */
header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
&image, align, jit_fill_hole);
if (!header) {
prog = orig_prog;
goto out_addrs;
}
prog->aux->extable = (void *) image + roundup(proglen, align);
}
oldproglen = proglen;
cond_resched();
Expand Down
3 changes: 3 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ struct sock;
struct seq_file;
struct btf;
struct btf_type;
struct exception_table_entry;

extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
Expand Down Expand Up @@ -423,6 +424,8 @@ struct bpf_prog_aux {
* main prog always has linfo_idx == 0
*/
u32 linfo_idx;
u32 num_exentries;
struct exception_table_entry *extable;
struct bpf_prog_stats __percpu *stats;
union {
struct work_struct work;
Expand Down
10 changes: 10 additions & 0 deletions include/linux/extable.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,14 @@ search_module_extables(unsigned long addr)
}
#endif /*CONFIG_MODULES*/

#ifdef CONFIG_BPF_JIT
const struct exception_table_entry *search_bpf_extables(unsigned long addr);
#else
static inline const struct exception_table_entry *
search_bpf_extables(unsigned long addr)
{
return NULL;
}
#endif

#endif /* _LINUX_EXTABLE_H */
20 changes: 19 additions & 1 deletion kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#include <linux/kallsyms.h>
#include <linux/rcupdate.h>
#include <linux/perf_event.h>

#include <linux/extable.h>
#include <asm/unaligned.h>

/* Registers */
Expand Down Expand Up @@ -712,6 +712,24 @@ bool is_bpf_text_address(unsigned long addr)
return ret;
}

const struct exception_table_entry *search_bpf_extables(unsigned long addr)
{
const struct exception_table_entry *e = NULL;
struct bpf_prog *prog;

rcu_read_lock();
prog = bpf_prog_kallsyms_find(addr);
if (!prog)
goto out;
if (!prog->aux->num_exentries)
goto out;

e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr);
out:
rcu_read_unlock();
return e;
}

int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym)
{
Expand Down
1 change: 1 addition & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -8729,6 +8729,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
return -EINVAL;
}
insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
env->prog->aux->num_exentries++;
continue;
default:
continue;
Expand Down
2 changes: 2 additions & 0 deletions kernel/extable.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
e = search_kernel_exception_table(addr);
if (!e)
e = search_module_extables(addr);
if (!e)
e = search_bpf_extables(addr);
return e;
}

Expand Down

0 comments on commit 3dec541

Please sign in to comment.