Skip to content

Commit

Permalink
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull x86 fpu updates from Ingo Molnar:
 "The main x86 FPU changes in this cycle were:

   - a large series of cleanups, fixes and enhancements to re-enable the
     XSAVES instruction on Intel CPUs - which is the most advanced
     instruction to do FPU context switches (Yu-cheng Yu, Fenghua Yu)

   - Add FPU tracepoints for the FPU state machine (Dave Hansen)"

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/fpu: Do not BUG_ON() in early FPU code
  x86/fpu/xstate: Re-enable XSAVES
  x86/fpu/xstate: Fix fpstate_init() for XRSTORS
  x86/fpu/xstate: Return NULL for disabled xstate component address
  x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES
  x86/fpu/xstate: Fix xstate_offsets, xstate_sizes for non-extended xstates
  x86/fpu/xstate: Fix XSTATE component offset print out
  x86/fpu/xstate: Fix PTRACE frames for XSAVES
  x86/fpu/xstate: Fix supervisor xstate component offset
  x86/fpu/xstate: Align xstate components according to CPUID
  x86/fpu/xstate: Copy xstate registers directly to the signal frame when compacted format is in use
  x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization
  x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size'
  x86/fpu/xstate: Define and use 'fpu_user_xstate_size'
  x86/fpu: Add tracepoints to dump FPU state at key points
  • Loading branch information
torvalds committed Jul 26, 2016
2 parents 36e635c + ec3ed4a commit 2d724ff
Show file tree
Hide file tree
Showing 11 changed files with 608 additions and 158 deletions.
5 changes: 5 additions & 0 deletions arch/x86/include/asm/fpu/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
#include <asm/cpufeature.h>
#include <asm/trace/fpu.h>

/*
* High level FPU state handling functions:
Expand Down Expand Up @@ -524,6 +525,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu)

fpu->fpregs_active = 0;
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
trace_x86_fpu_regs_deactivated(fpu);
}

/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
Expand All @@ -533,6 +535,7 @@ static inline void __fpregs_activate(struct fpu *fpu)

fpu->fpregs_active = 1;
this_cpu_write(fpu_fpregs_owner_ctx, fpu);
trace_x86_fpu_regs_activated(fpu);
}

/*
Expand Down Expand Up @@ -604,11 +607,13 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)

/* But leave fpu_fpregs_owner_ctx! */
old_fpu->fpregs_active = 0;
trace_x86_fpu_regs_deactivated(old_fpu);

/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
new_fpu->counter++;
__fpregs_activate(new_fpu);
trace_x86_fpu_regs_activated(new_fpu);
prefetch(&new_fpu->state);
} else {
__fpregs_deactivate_hw();
Expand Down
7 changes: 7 additions & 0 deletions arch/x86/include/asm/fpu/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ enum xfeature {
#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
#define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)

#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
Expand Down Expand Up @@ -230,6 +231,12 @@ struct xstate_header {
u64 reserved[6];
} __attribute__((packed));

/*
* xstate_header.xcomp_bv[63] indicates that the extended_state_area
* is in compacted format.
*/
#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63)

/*
* This is our most modern FPU state format, as saved by the XSAVE
* and restored by the XRSTOR instructions.
Expand Down
10 changes: 8 additions & 2 deletions arch/x86/include/asm/fpu/xstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
#define XSAVE_YMM_SIZE 256
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)

/* Supervisor features */
#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)

/* Supported features which support lazy state saving */
#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
XFEATURE_MASK_SSE | \
Expand All @@ -39,7 +42,6 @@
#define REX_PREFIX
#endif

extern unsigned int xstate_size;
extern u64 xfeatures_mask;
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];

Expand All @@ -48,5 +50,9 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
void fpu__xstate_clear_all_cpu_caps(void);
void *get_xsave_addr(struct xregs_state *xsave, int xstate);
const void *get_xsave_field_ptr(int xstate_field);

int using_compacted_format(void);
int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
void __user *ubuf, struct xregs_state *xsave);
int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
struct xregs_state *xsave);
#endif
3 changes: 2 additions & 1 deletion arch/x86/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,8 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
#endif /* X86_64 */

extern unsigned int xstate_size;
extern unsigned int fpu_kernel_xstate_size;
extern unsigned int fpu_user_xstate_size;

struct perf_event;

Expand Down
119 changes: 119 additions & 0 deletions arch/x86/include/asm/trace/fpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM x86_fpu

#if !defined(_TRACE_FPU_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_FPU_H

#include <linux/tracepoint.h>

DECLARE_EVENT_CLASS(x86_fpu,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu),

TP_STRUCT__entry(
__field(struct fpu *, fpu)
__field(bool, fpregs_active)
__field(bool, fpstate_active)
__field(int, counter)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),

TP_fast_assign(
__entry->fpu = fpu;
__entry->fpregs_active = fpu->fpregs_active;
__entry->fpstate_active = fpu->fpstate_active;
__entry->counter = fpu->counter;
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
}
),
TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx",
__entry->fpu,
__entry->fpregs_active,
__entry->fpstate_active,
__entry->counter,
__entry->xfeatures,
__entry->xcomp_bv
)
);

DEFINE_EVENT(x86_fpu, x86_fpu_state,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_after_save,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_before_restore,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_after_restore,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_dropped,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_copy_src,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_xstate_check_failed,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);

#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH asm/trace/
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE fpu
#endif /* _TRACE_FPU_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
33 changes: 30 additions & 3 deletions arch/x86/kernel/fpu/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@
#include <asm/fpu/internal.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/types.h>
#include <asm/traps.h>

#include <linux/hardirq.h>

#define CREATE_TRACE_POINTS
#include <asm/trace/fpu.h>

/*
* Represents the initial FPU state. It's mostly (but not completely) zeroes,
* depending on the FPU hardware format:
Expand Down Expand Up @@ -192,6 +196,7 @@ void fpu__save(struct fpu *fpu)
WARN_ON_FPU(fpu != &current->thread.fpu);

preempt_disable();
trace_x86_fpu_before_save(fpu);
if (fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(fpu)) {
if (use_eager_fpu())
Expand All @@ -200,6 +205,7 @@ void fpu__save(struct fpu *fpu)
fpregs_deactivate(fpu);
}
}
trace_x86_fpu_after_save(fpu);
preempt_enable();
}
EXPORT_SYMBOL_GPL(fpu__save);
Expand All @@ -222,7 +228,14 @@ void fpstate_init(union fpregs_state *state)
return;
}

memset(state, 0, xstate_size);
memset(state, 0, fpu_kernel_xstate_size);

/*
* XRSTORS requires that this bit is set in xcomp_bv, or
* it will #GP. Make sure it is replaced after the memset().
*/
if (static_cpu_has(X86_FEATURE_XSAVES))
state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT;

if (static_cpu_has(X86_FEATURE_FXSR))
fpstate_init_fxstate(&state->fxsave);
Expand All @@ -247,7 +260,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* leak into the child task:
*/
if (use_eager_fpu())
memset(&dst_fpu->state.xsave, 0, xstate_size);
memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);

/*
* Save current FPU registers directly into the child
Expand All @@ -266,7 +279,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
*/
preempt_disable();
if (!copy_fpregs_to_fpstate(dst_fpu)) {
memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
memcpy(&src_fpu->state, &dst_fpu->state,
fpu_kernel_xstate_size);

if (use_eager_fpu())
copy_kernel_to_fpregs(&src_fpu->state);
Expand All @@ -275,6 +289,9 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
}
preempt_enable();

trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);

return 0;
}

Expand All @@ -288,7 +305,9 @@ void fpu__activate_curr(struct fpu *fpu)

if (!fpu->fpstate_active) {
fpstate_init(&fpu->state);
trace_x86_fpu_init_state(fpu);

trace_x86_fpu_activate_state(fpu);
/* Safe to do for the current task: */
fpu->fpstate_active = 1;
}
Expand All @@ -314,7 +333,9 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
} else {
if (!fpu->fpstate_active) {
fpstate_init(&fpu->state);
trace_x86_fpu_init_state(fpu);

trace_x86_fpu_activate_state(fpu);
/* Safe to do for current and for stopped child tasks: */
fpu->fpstate_active = 1;
}
Expand Down Expand Up @@ -347,7 +368,9 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
fpu->last_cpu = -1;
} else {
fpstate_init(&fpu->state);
trace_x86_fpu_init_state(fpu);

trace_x86_fpu_activate_state(fpu);
/* Safe to do for stopped child tasks: */
fpu->fpstate_active = 1;
}
Expand Down Expand Up @@ -432,9 +455,11 @@ void fpu__restore(struct fpu *fpu)

/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
kernel_fpu_disable();
trace_x86_fpu_before_restore(fpu);
fpregs_activate(fpu);
copy_kernel_to_fpregs(&fpu->state);
fpu->counter++;
trace_x86_fpu_after_restore(fpu);
kernel_fpu_enable();
}
EXPORT_SYMBOL_GPL(fpu__restore);
Expand Down Expand Up @@ -463,6 +488,8 @@ void fpu__drop(struct fpu *fpu)

fpu->fpstate_active = 0;

trace_x86_fpu_dropped(fpu);

preempt_enable();
}

Expand Down
Loading

0 comments on commit 2d724ff

Please sign in to comment.