x86/bhi: Add support for clearing branch history at syscall entry

Branch History Injection (BHI) attacks may allow a malicious application to
influence indirect branch prediction in kernel by poisoning the branch
history. eIBRS isolates indirect branch targets in ring0.  The BHB can
still influence the choice of indirect branch predictor entry, and although
branch predictor entries are isolated between modes when eIBRS is enabled,
the BHB itself is not isolated between modes.

Alder Lake and new processors supports a hardware control BHI_DIS_S to
mitigate BHI.  For older processors Intel has released a software sequence
to clear the branch history on parts that don't support BHI_DIS_S. Add
support to execute the software sequence at syscall entry and VMexit to
overwrite the branch history.

For now, branch history is not cleared at interrupt entry, as malicious
applications are not believed to have sufficient control over the
registers, since previous register state is cleared at interrupt
entry. Researchers continue to poke at this area and it may become
necessary to clear at interrupt entry as well in the future.

This mitigation is only defined here. It is enabled later.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Co-developed-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
This commit is contained in:
Pawan Gupta 2024-03-11 08:56:58 -07:00 committed by Thomas Gleixner
parent 1e3ad78334
commit 7390db8aea
7 changed files with 96 additions and 3 deletions

View File

@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
} }
/** /**
* int80_emulation - 32-bit legacy syscall entry * do_int80_emulation - 32-bit legacy syscall C entry from asm
* *
* This entry point can be used by 32-bit and 64-bit programs to perform * This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in * 32-bit system calls. Instances of INT $0x80 can be found inline in
@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
* eax: system call number * eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/ */
DEFINE_IDTENTRY_RAW(int80_emulation) __visible noinstr void do_int80_emulation(struct pt_regs *regs)
{ {
int nr; int nr;

View File

@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
/* clobbers %rax, make sure it is after saving the syscall nr */ /* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER IBRS_ENTER
UNTRAIN_RET UNTRAIN_RET
CLEAR_BRANCH_HISTORY
call do_syscall_64 /* returns with IRQs disabled */ call do_syscall_64 /* returns with IRQs disabled */
@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
call make_task_dead call make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead) SYM_CODE_END(rewind_stack_and_make_dead)
.popsection .popsection
/*
* This sequence executes branches in order to remove user branch information
* from the branch history tracker in the Branch Predictor, therefore removing
* user influence on subsequent BTB lookups.
*
* It should be used on parts prior to Alder Lake. Newer parts should use the
* BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
* virtualized on newer hardware the VMM should protect against BHI attacks by
* setting BHI_DIS_S for the guests.
*
* CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
* and not clearing the branch history. The call tree looks like:
*
* call 1
* call 2
* call 2
* call 2
* call 2
* call 2
* ret
* ret
* ret
* ret
* ret
* ret
*
* This means that the stack is non-constant and ORC can't unwind it with %rsp
* alone. Therefore we unconditionally set up the frame pointer, which allows
* ORC to unwind properly.
*
* The alignment is for performance and not for safety, and may be safely
* refactored in the future if needed.
*/
SYM_FUNC_START(clear_bhb_loop)
push %rbp
mov %rsp, %rbp
movl $5, %ecx
ANNOTATE_INTRA_FUNCTION_CALL
call 1f
jmp 5f
.align 64, 0xcc
ANNOTATE_INTRA_FUNCTION_CALL
1: call 2f
RET
.align 64, 0xcc
2: movl $5, %eax
3: jmp 4f
nop
4: sub $1, %eax
jnz 3b
sub $1, %ecx
jnz 1b
RET
5: lfence
pop %rbp
RET
SYM_FUNC_END(clear_bhb_loop)
EXPORT_SYMBOL_GPL(clear_bhb_loop)
STACK_FRAME_NON_STANDARD(clear_bhb_loop)

View File

@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER IBRS_ENTER
UNTRAIN_RET UNTRAIN_RET
CLEAR_BRANCH_HISTORY
/* /*
* SYSENTER doesn't filter flags, so we need to clear NT and AC * SYSENTER doesn't filter flags, so we need to clear NT and AC
@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER IBRS_ENTER
UNTRAIN_RET UNTRAIN_RET
CLEAR_BRANCH_HISTORY
movq %rsp, %rdi movq %rsp, %rdi
call do_fast_syscall_32 call do_fast_syscall_32
@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR ANNOTATE_NOENDBR
int3 int3
SYM_CODE_END(entry_SYSCALL_compat) SYM_CODE_END(entry_SYSCALL_compat)
/*
* int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
* point to C routines, however since this is a system call interface the branch
* history needs to be scrubbed to protect against BHI attacks, and that
* scrubbing needs to take place in assembly code prior to entering any C
* routines.
*/
SYM_CODE_START(int80_emulation)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
CLEAR_BRANCH_HISTORY
jmp do_int80_emulation
SYM_CODE_END(int80_emulation)

View File

@ -461,11 +461,12 @@
/* /*
* Extended auxiliary flags: Linux defined - for features scattered in various * Extended auxiliary flags: Linux defined - for features scattered in various
* CPUID levels like 0x80000022, etc. * CPUID levels like 0x80000022, etc and Linux defined features.
* *
* Reuse free bits when adding new feature flags! * Reuse free bits when adding new feature flags!
*/ */
#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
/* /*
* BUG word(s) * BUG word(s)

View File

@ -326,6 +326,14 @@
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm .endm
#ifdef CONFIG_X86_64
.macro CLEAR_BRANCH_HISTORY
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
.endm
#else
#define CLEAR_BRANCH_HISTORY
#endif
#else /* __ASSEMBLY__ */ #else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \ #define ANNOTATE_RETPOLINE_SAFE \
@ -368,6 +376,10 @@ extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void); extern void entry_untrain_ret(void);
extern void entry_ibpb(void); extern void entry_ibpb(void);
#ifdef CONFIG_X86_64
extern void clear_bhb_loop(void);
#endif
extern void (*x86_return_thunk)(void); extern void (*x86_return_thunk)(void);
extern void __warn_thunk(void); extern void __warn_thunk(void);

View File

@ -125,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
} }
bool do_syscall_64(struct pt_regs *regs, int nr); bool do_syscall_64(struct pt_regs *regs, int nr);
void do_int80_emulation(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */

View File

@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
call vmx_spec_ctrl_restore_host call vmx_spec_ctrl_restore_host
CLEAR_BRANCH_HISTORY
/* Put return value in AX */ /* Put return value in AX */
mov %_ASM_BX, %_ASM_AX mov %_ASM_BX, %_ASM_AX