Skip to content

Commit

Permalink
x86/entry/64: Use a per-CPU trampoline stack for IDT entries
Browse files Browse the repository at this point in the history
Historically, IDT entries from usermode have always gone directly
to the running task's kernel stack.  Rearrange it so that we enter on
a per-CPU trampoline stack and then manually switch to the task's stack.
This touches a couple of extra cachelines, but it gives us a chance
to run some code before we touch the kernel stack.

The asm isn't exactly beautiful, but I think that fully refactoring
it can wait.

Signed-off-by: Andy Lutomirski <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Borislav Petkov <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Boris Ostrovsky <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: David Laight <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: Eduardo Valentin <[email protected]>
Cc: Greg KH <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
amluto authored and Ingo Molnar committed Dec 17, 2017
1 parent 6d9256f commit 7f2590a
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 32 deletions.
67 changes: 50 additions & 17 deletions arch/x86/entry/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,13 @@ END(irq_entries_start)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
cld

testb $3, CS-ORIG_RAX(%rsp)
jz 1f
SWAPGS
call switch_to_thread_stack
1:

ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
Expand All @@ -569,12 +576,8 @@ END(irq_entries_start)
jz 1f

/*
* IRQ from user mode. Switch to kernel gsbase and inform context
* tracking that we're in kernel mode.
*/
SWAPGS

/*
* IRQ from user mode.
*
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
* (which can take locks). Since TRACE_IRQS_OFF idempotent,
Expand Down Expand Up @@ -828,6 +831,32 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)

/*
* Switch to the thread stack. This is called with the IRET frame and
* orig_ax on the stack. (That is, RDI..R12 are not on the stack and
* space has not been allocated for them.)
*/
ENTRY(switch_to_thread_stack)
UNWIND_HINT_FUNC

pushq %rdi
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI

pushq 7*8(%rdi) /* regs->ss */
pushq 6*8(%rdi) /* regs->rsp */
pushq 5*8(%rdi) /* regs->eflags */
pushq 4*8(%rdi) /* regs->cs */
pushq 3*8(%rdi) /* regs->ip */
pushq 2*8(%rdi) /* regs->orig_ax */
pushq 8(%rdi) /* return address */
UNWIND_HINT_FUNC

movq (%rdi), %rdi
ret
END(switch_to_thread_stack)

.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
Expand All @@ -845,11 +874,12 @@ ENTRY(\sym)

ALLOC_PT_GPREGS_ON_STACK

.if \paranoid
.if \paranoid == 1
.if \paranoid < 2
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
jnz 1f
jnz .Lfrom_usermode_switch_stack_\@
.endif

.if \paranoid
call paranoid_entry
.else
call error_entry
Expand Down Expand Up @@ -891,20 +921,15 @@ ENTRY(\sym)
jmp error_exit
.endif

.if \paranoid == 1
.if \paranoid < 2
/*
* Paranoid entry from userspace. Switch stacks and treat it
* Entry from userspace. Switch stacks and treat it
* as a normal entry. This means that paranoid handlers
* run in real process context if user_mode(regs).
*/
1:
.Lfrom_usermode_switch_stack_\@:
call error_entry


movq %rsp, %rdi /* pt_regs pointer */
call sync_regs
movq %rax, %rsp /* switch stack */

movq %rsp, %rdi /* pt_regs pointer */

.if \has_error_code
Expand Down Expand Up @@ -1165,6 +1190,14 @@ ENTRY(error_entry)
SWAPGS

.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
popq %r12 /* save return addr in %12 */
movq %rsp, %rdi /* arg0 = pt_regs pointer */
call sync_regs
movq %rax, %rsp /* switch stack */
ENCODE_FRAME_POINTER
pushq %r12

/*
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
Expand Down
5 changes: 4 additions & 1 deletion arch/x86/entry/entry_64_compat.S
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
*/
movl %eax, %eax

/* Construct struct pt_regs on stack (iret frame is already on stack) */
pushq %rax /* pt_regs->orig_ax */

/* switch to thread stack expects orig_ax to be pushed */
call switch_to_thread_stack

pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
Expand Down
4 changes: 3 additions & 1 deletion arch/x86/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
/* This is used when switching tasks or entering/exiting vm86 mode. */
static inline void update_sp0(struct task_struct *task)
{
/* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
#ifdef CONFIG_X86_32
load_sp0(task->thread.sp0);
#else
load_sp0(task_top_of_stack(task));
if (static_cpu_has(X86_FEATURE_XENPV))
load_sp0(task_top_of_stack(task));
#endif
}

Expand Down
1 change: 0 additions & 1 deletion arch/x86/include/asm/traps.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
dotraplinkage void do_stack_segment(struct pt_regs *, long);
#ifdef CONFIG_X86_64
dotraplinkage void do_double_fault(struct pt_regs *, long);
asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
#endif
dotraplinkage void do_general_protection(struct pt_regs *, long);
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
Expand Down
6 changes: 4 additions & 2 deletions arch/x86/kernel/cpu/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1623,11 +1623,13 @@ void cpu_init(void)
setup_cpu_entry_area(cpu);

/*
* Initialize the TSS. Don't bother initializing sp0, as the initial
* task never enters user mode.
* Initialize the TSS. sp0 points to the entry trampoline stack
* regardless of what task is running.
*/
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc();
load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss +
offsetofend(struct tss_struct, SYSENTER_stack));

load_mm_ldt(&init_mm);

Expand Down
21 changes: 11 additions & 10 deletions arch/x86/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -619,14 +619,15 @@ NOKPROBE_SYMBOL(do_int3);

#ifdef CONFIG_X86_64
/*
* Help handler running on IST stack to switch off the IST stack if the
* interrupted code was in user mode. The actual stack switch is done in
* entry_64.S
* Help handler running on a per-cpu (IST or entry trampoline) stack
* to switch to the normal thread stack if the interrupted code was in
* user mode. The actual stack switch is done in entry_64.S
*/
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
{
struct pt_regs *regs = task_pt_regs(current);
*regs = *eregs;
struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
if (regs != eregs)
*regs = *eregs;
return regs;
}
NOKPROBE_SYMBOL(sync_regs);
Expand All @@ -642,13 +643,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
/*
* This is called from entry_64.S early in handling a fault
* caused by a bad iret to user mode. To handle the fault
* correctly, we want move our stack frame to task_pt_regs
* and we want to pretend that the exception came from the
* iret target.
* correctly, we want to move our stack frame to where it would
* be had we entered directly on the entry stack (rather than
* just below the IRET frame) and we want to pretend that the
* exception came from the IRET target.
*/
struct bad_iret_stack *new_stack =
container_of(task_pt_regs(current),
struct bad_iret_stack, regs);
(struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;

/* Copy the IRET target to the new stack. */
memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
Expand Down

0 comments on commit 7f2590a

Please sign in to comment.