x86 mitigations for the native BHI hardware vulnerabilty:

Branch History Injection (BHI) attacks may allow a malicious application to
 influence indirect branch prediction in kernel by poisoning the branch
 history. eIBRS isolates indirect branch targets in ring0.  The BHB can
 still influence the choice of indirect branch predictor entry, and although
 branch predictor entries are isolated between modes when eIBRS is enabled,
 the BHB itself is not isolated between modes.
 
 Add mitigations against it either with the help of microcode or with
 software sequences for the affected CPUs.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmYUKPMTHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYofT8EACJJix+GzGUcJjOvfWFZcxwziY152hO
 5XSzHOZZL6oz5Yk/Rye/S9RVTN7aDjn1CEvI0cD/ULxaTP869sS9dDdUcHhEJ//5
 6hjqWsWiKc1QmLjBy3Pcb97GZHQXM5a9D1f6jXnJD+0FMLbQHpzSEBit0H4tv/TC
 75myGgYihvUbhN9/bL10M5fz+UADU42nChvPWDMr9ukljjCqa46tPTmKUIAW5TWj
 /xsyf+Nk+4kZpdaidKGhpof6KCV2rNeevvzUGN8Pv5y13iAmvlyplqTcQ6dlubnZ
 CuDX5Ji9spNF9WmhKpLgy5N+Ocb64oVHov98N2zw1sT1N8XOYcSM0fBj7SQIFURs
 L7T4jBZS+1c3ZGJPPFWIaGjV8w1ZMhelglwJxjY7ZgRD6fK3mwRx/ks54J8H4HjE
 FbirXaZLeKlscDIOKtnxxKoIGwpdGwLKQYi/wEw7F9NhCLSj9wMia+j3uYIUEEHr
 6xEiYEtyjcV3ocxagH7eiHyrasOKG64vjx2h1XodusBA2Wrvgm/jXlchUu+wb6B4
 LiiZJt+DmOdQ1h5j3r2rt3hw7+nWa7kyq34qfN6NSUCHiedp6q7BClueSaKiOCGk
 RoNibNiS+CqaxwGxj/RGuvajEJeEMCsLuCxzT3aeaDBsqscW6Ka/HkGA76Tpb5nJ
 E3JyjYE7AlG4rw==
 =W0W3
 -----END PGP SIGNATURE-----

Merge tag 'nativebhi' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mitigations from Thomas Gleixner:
 "Mitigations for the native BHI hardware vulnerabilty:

  Branch History Injection (BHI) attacks may allow a malicious
  application to influence indirect branch prediction in kernel by
  poisoning the branch history. eIBRS isolates indirect branch targets
  in ring0. The BHB can still influence the choice of indirect branch
  predictor entry, and although branch predictor entries are isolated
  between modes when eIBRS is enabled, the BHB itself is not isolated
  between modes.

  Add mitigations against it either with the help of microcode or with
  software sequences for the affected CPUs"

[ This also ends up enabling the full mitigation by default despite the
  system call hardening, because apparently there are other indirect
  calls that are still sufficiently reachable, and the 'auto' case just
  isn't hardened enough.

  We'll have some more inevitable tweaking in the future    - Linus ]

* tag 'nativebhi' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  KVM: x86: Add BHI_NO
  x86/bhi: Mitigate KVM by default
  x86/bhi: Add BHI mitigation knob
  x86/bhi: Enumerate Branch History Injection (BHI) bug
  x86/bhi: Define SPEC_CTRL_BHI_DIS_S
  x86/bhi: Add support for clearing branch history at syscall entry
  x86/syscall: Don't force use of indirect calls for system calls
  x86/bugs: Change commas to semicolons in 'spectre_v2' sysfs file
This commit is contained in:
Linus Torvalds 2024-04-08 20:07:51 -07:00
commit 2bb69f5fc7
19 changed files with 371 additions and 49 deletions

View File

@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
the BHB might be shared across privilege levels even in the presence of the BHB might be shared across privilege levels even in the presence of
Enhanced IBRS. Enhanced IBRS.
Currently the only known real-world BHB attack vector is via Previously the only known real-world BHB attack vector was via unprivileged
unprivileged eBPF. Therefore, it's highly recommended to not enable eBPF. Further research has found attacks that don't require unprivileged eBPF.
unprivileged eBPF, especially when eIBRS is used (without retpolines). For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
For a full mitigation against BHB attacks, it's recommended to use use the BHB clearing sequence.
retpolines (or eIBRS combined with retpolines).
Attack scenarios Attack scenarios
---------------- ----------------
@ -430,6 +429,23 @@ The possible values in this file are:
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
=========================== ======================================================= =========================== =======================================================
- Branch History Injection (BHI) protection status:
.. list-table::
* - BHI: Not affected
- System is not affected
* - BHI: Retpoline
- System is protected by retpoline
* - BHI: BHI_DIS_S
- System is protected by BHI_DIS_S
* - BHI: SW loop; KVM SW loop
- System is protected by software clearing sequence
* - BHI: Syscall hardening
- Syscalls are hardened against BHI
* - BHI: Syscall hardening; KVM: SW loop
- System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence
Full mitigation might require a microcode update from the CPU Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will vendor. When the necessary microcode is not available, the kernel will
report vulnerability. report vulnerability.
@ -484,7 +500,11 @@ Spectre variant 2
Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
boot, by setting the IBRS bit, and they're automatically protected against boot, by setting the IBRS bit, and they're automatically protected against
Spectre v2 variant attacks. some Spectre v2 variant attacks. The BHB can still influence the choice of
indirect branch predictor entry, and although branch predictor entries are
isolated between modes when eIBRS is enabled, the BHB itself is not isolated
between modes. Systems which support BHI_DIS_S will set it to protect against
BHI attacks.
On Intel's enhanced IBRS systems, this includes cross-thread branch target On Intel's enhanced IBRS systems, this includes cross-thread branch target
injections on SMT systems (STIBP). In other words, Intel eIBRS enables injections on SMT systems (STIBP). In other words, Intel eIBRS enables
@ -638,6 +658,22 @@ kernel command line.
spectre_v2=off. Spectre variant 1 mitigations spectre_v2=off. Spectre variant 1 mitigations
cannot be disabled. cannot be disabled.
spectre_bhi=
[X86] Control mitigation of Branch History Injection
(BHI) vulnerability. Syscalls are hardened against BHI
regardless of this setting. This setting affects the deployment
of the HW BHI control and the SW BHB clearing sequence.
on
unconditionally enable.
off
unconditionally disable.
auto
enable if hardware mitigation
control(BHI_DIS_S) is available, otherwise
enable alternate mitigation in KVM.
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
Mitigation selection guide Mitigation selection guide

View File

@ -6063,6 +6063,18 @@
sonypi.*= [HW] Sony Programmable I/O Control Device driver sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/admin-guide/laptops/sonypi.rst See Documentation/admin-guide/laptops/sonypi.rst
spectre_bhi= [X86] Control mitigation of Branch History Injection
(BHI) vulnerability. Syscalls are hardened against BHI
reglardless of this setting. This setting affects the
deployment of the HW BHI control and the SW BHB
clearing sequence.
on - unconditionally enable.
off - unconditionally disable.
auto - (default) enable hardware mitigation
(BHI_DIS_S) if available, otherwise enable
alternate mitigation in KVM.
spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2 spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability. (indirect branch speculation) vulnerability.
The default operation protects the kernel from The default operation protects the kernel from

View File

@ -2633,6 +2633,32 @@ config MITIGATION_RFDS
stored in floating point, vector and integer registers. stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst> See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
choice
prompt "Clear branch history"
depends on CPU_SUP_INTEL
default SPECTRE_BHI_ON
help
Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
where the branch history buffer is poisoned to speculatively steer
indirect branches.
See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
config SPECTRE_BHI_ON
bool "on"
help
Equivalent to setting spectre_bhi=on command line parameter.
config SPECTRE_BHI_OFF
bool "off"
help
Equivalent to setting spectre_bhi=off command line parameter.
config SPECTRE_BHI_AUTO
bool "auto"
depends on BROKEN
help
Equivalent to setting spectre_bhi=auto command line parameter.
endchoice
endif endif
config ARCH_HAS_ADD_PAGES config ARCH_HAS_ADD_PAGES

View File

@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
if (likely(unr < NR_syscalls)) { if (likely(unr < NR_syscalls)) {
unr = array_index_nospec(unr, NR_syscalls); unr = array_index_nospec(unr, NR_syscalls);
regs->ax = sys_call_table[unr](regs); regs->ax = x64_sys_call(regs, unr);
return true; return true;
} }
return false; return false;
@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
xnr = array_index_nospec(xnr, X32_NR_syscalls); xnr = array_index_nospec(xnr, X32_NR_syscalls);
regs->ax = x32_sys_call_table[xnr](regs); regs->ax = x32_sys_call(regs, xnr);
return true; return true;
} }
return false; return false;
@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
if (likely(unr < IA32_NR_syscalls)) { if (likely(unr < IA32_NR_syscalls)) {
unr = array_index_nospec(unr, IA32_NR_syscalls); unr = array_index_nospec(unr, IA32_NR_syscalls);
regs->ax = ia32_sys_call_table[unr](regs); regs->ax = ia32_sys_call(regs, unr);
} else if (nr != -1) { } else if (nr != -1) {
regs->ax = __ia32_sys_ni_syscall(regs); regs->ax = __ia32_sys_ni_syscall(regs);
} }
@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
} }
/** /**
* int80_emulation - 32-bit legacy syscall entry * do_int80_emulation - 32-bit legacy syscall C entry from asm
* *
* This entry point can be used by 32-bit and 64-bit programs to perform * This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in * 32-bit system calls. Instances of INT $0x80 can be found inline in
@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
* eax: system call number * eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/ */
DEFINE_IDTENTRY_RAW(int80_emulation) __visible noinstr void do_int80_emulation(struct pt_regs *regs)
{ {
int nr; int nr;

View File

@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
/* clobbers %rax, make sure it is after saving the syscall nr */ /* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER IBRS_ENTER
UNTRAIN_RET UNTRAIN_RET
CLEAR_BRANCH_HISTORY
call do_syscall_64 /* returns with IRQs disabled */ call do_syscall_64 /* returns with IRQs disabled */
@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
call make_task_dead call make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead) SYM_CODE_END(rewind_stack_and_make_dead)
.popsection .popsection
/*
* This sequence executes branches in order to remove user branch information
* from the branch history tracker in the Branch Predictor, therefore removing
* user influence on subsequent BTB lookups.
*
* It should be used on parts prior to Alder Lake. Newer parts should use the
* BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
* virtualized on newer hardware the VMM should protect against BHI attacks by
* setting BHI_DIS_S for the guests.
*
* CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
* and not clearing the branch history. The call tree looks like:
*
* call 1
* call 2
* call 2
* call 2
* call 2
* call 2
* ret
* ret
* ret
* ret
* ret
* ret
*
* This means that the stack is non-constant and ORC can't unwind it with %rsp
* alone. Therefore we unconditionally set up the frame pointer, which allows
* ORC to unwind properly.
*
* The alignment is for performance and not for safety, and may be safely
* refactored in the future if needed.
*/
SYM_FUNC_START(clear_bhb_loop)
push %rbp
mov %rsp, %rbp
movl $5, %ecx
ANNOTATE_INTRA_FUNCTION_CALL
call 1f
jmp 5f
.align 64, 0xcc
ANNOTATE_INTRA_FUNCTION_CALL
1: call 2f
RET
.align 64, 0xcc
2: movl $5, %eax
3: jmp 4f
nop
4: sub $1, %eax
jnz 3b
sub $1, %ecx
jnz 1b
RET
5: lfence
pop %rbp
RET
SYM_FUNC_END(clear_bhb_loop)
EXPORT_SYMBOL_GPL(clear_bhb_loop)
STACK_FRAME_NON_STANDARD(clear_bhb_loop)

View File

@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER IBRS_ENTER
UNTRAIN_RET UNTRAIN_RET
CLEAR_BRANCH_HISTORY
/* /*
* SYSENTER doesn't filter flags, so we need to clear NT and AC * SYSENTER doesn't filter flags, so we need to clear NT and AC
@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER IBRS_ENTER
UNTRAIN_RET UNTRAIN_RET
CLEAR_BRANCH_HISTORY
movq %rsp, %rdi movq %rsp, %rdi
call do_fast_syscall_32 call do_fast_syscall_32
@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR ANNOTATE_NOENDBR
int3 int3
SYM_CODE_END(entry_SYSCALL_compat) SYM_CODE_END(entry_SYSCALL_compat)
/*
* int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
* point to C routines, however since this is a system call interface the branch
* history needs to be scrubbed to protect against BHI attacks, and that
* scrubbing needs to take place in assembly code prior to entering any C
* routines.
*/
SYM_CODE_START(int80_emulation)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
CLEAR_BRANCH_HISTORY
jmp do_int80_emulation
SYM_CODE_END(int80_emulation)

View File

@ -18,8 +18,25 @@
#include <asm/syscalls_32.h> #include <asm/syscalls_32.h>
#undef __SYSCALL #undef __SYSCALL
/*
* The sys_call_table[] is no longer used for system calls, but
* kernel/trace/trace_syscalls.c still wants to know the system
* call address.
*/
#ifdef CONFIG_X86_32
#define __SYSCALL(nr, sym) __ia32_##sym, #define __SYSCALL(nr, sym) __ia32_##sym,
const sys_call_ptr_t sys_call_table[] = {
__visible const sys_call_ptr_t ia32_sys_call_table[] = {
#include <asm/syscalls_32.h> #include <asm/syscalls_32.h>
}; };
#undef __SYSCALL
#endif
#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
{
switch (nr) {
#include <asm/syscalls_32.h>
default: return __ia32_sys_ni_syscall(regs);
}
};

View File

@ -11,8 +11,23 @@
#include <asm/syscalls_64.h> #include <asm/syscalls_64.h>
#undef __SYSCALL #undef __SYSCALL
/*
* The sys_call_table[] is no longer used for system calls, but
* kernel/trace/trace_syscalls.c still wants to know the system
* call address.
*/
#define __SYSCALL(nr, sym) __x64_##sym, #define __SYSCALL(nr, sym) __x64_##sym,
const sys_call_ptr_t sys_call_table[] = {
asmlinkage const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h> #include <asm/syscalls_64.h>
}; };
#undef __SYSCALL
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
{
switch (nr) {
#include <asm/syscalls_64.h>
default: return __x64_sys_ni_syscall(regs);
}
};

View File

@ -11,8 +11,12 @@
#include <asm/syscalls_x32.h> #include <asm/syscalls_x32.h>
#undef __SYSCALL #undef __SYSCALL
#define __SYSCALL(nr, sym) __x64_##sym, #define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
#include <asm/syscalls_x32.h> {
switch (nr) {
#include <asm/syscalls_x32.h>
default: return __x64_sys_ni_syscall(regs);
}
}; };

View File

@ -461,11 +461,15 @@
/* /*
* Extended auxiliary flags: Linux defined - for features scattered in various * Extended auxiliary flags: Linux defined - for features scattered in various
* CPUID levels like 0x80000022, etc. * CPUID levels like 0x80000022, etc and Linux defined features.
* *
* Reuse free bits when adding new feature flags! * Reuse free bits when adding new feature flags!
*/ */
#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
/* /*
* BUG word(s) * BUG word(s)
@ -515,4 +519,5 @@
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */ #endif /* _ASM_X86_CPUFEATURES_H */

View File

@ -61,10 +61,13 @@
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */ /* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
| SPEC_CTRL_RRSBA_DIS_S) | SPEC_CTRL_RRSBA_DIS_S \
| SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@ -163,6 +166,10 @@
* are restricted to targets in * are restricted to targets in
* kernel. * kernel.
*/ */
#define ARCH_CAP_BHI_NO BIT(20) /*
* CPU is not affected by Branch
* History Injection.
*/
#define ARCH_CAP_PBRSB_NO BIT(24) /* #define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier * Not susceptible to Post-Barrier
* Return Stack Buffer Predictions. * Return Stack Buffer Predictions.

View File

@ -326,6 +326,19 @@
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm .endm
#ifdef CONFIG_X86_64
.macro CLEAR_BRANCH_HISTORY
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
.endm
.macro CLEAR_BRANCH_HISTORY_VMEXIT
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
.endm
#else
#define CLEAR_BRANCH_HISTORY
#define CLEAR_BRANCH_HISTORY_VMEXIT
#endif
#else /* __ASSEMBLY__ */ #else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \ #define ANNOTATE_RETPOLINE_SAFE \
@ -368,6 +381,10 @@ extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void); extern void entry_untrain_ret(void);
extern void entry_ibpb(void); extern void entry_ibpb(void);
#ifdef CONFIG_X86_64
extern void clear_bhb_loop(void);
#endif
extern void (*x86_return_thunk)(void); extern void (*x86_return_thunk)(void);
extern void __warn_thunk(void); extern void __warn_thunk(void);

View File

@ -16,19 +16,17 @@
#include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h> #include <asm/unistd.h>
/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *); typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[]; extern const sys_call_ptr_t sys_call_table[];
#if defined(CONFIG_X86_32)
#define ia32_sys_call_table sys_call_table
#else
/* /*
* These may not exist, but still put the prototypes in so we * These may not exist, but still put the prototypes in so we
* can use IS_ENABLED(). * can use IS_ENABLED().
*/ */
extern const sys_call_ptr_t ia32_sys_call_table[]; extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
extern const sys_call_ptr_t x32_sys_call_table[]; extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
#endif extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
/* /*
* Only the low 32 bits of orig_ax are meaningful, so we return int. * Only the low 32 bits of orig_ax are meaningful, so we return int.
@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
} }
bool do_syscall_64(struct pt_regs *regs, int nr); bool do_syscall_64(struct pt_regs *regs, int nr);
void do_int80_emulation(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */

View File

@ -1607,6 +1607,79 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
dump_stack(); dump_stack();
} }
/*
* Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
* branch history in userspace. Not needed if BHI_NO is set.
*/
static bool __init spec_ctrl_bhi_dis(void)
{
if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
return false;
x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
update_spec_ctrl(x86_spec_ctrl_base);
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
return true;
}
enum bhi_mitigations {
BHI_MITIGATION_OFF,
BHI_MITIGATION_ON,
BHI_MITIGATION_AUTO,
};
static enum bhi_mitigations bhi_mitigation __ro_after_init =
IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON :
IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF :
BHI_MITIGATION_AUTO;
static int __init spectre_bhi_parse_cmdline(char *str)
{
if (!str)
return -EINVAL;
if (!strcmp(str, "off"))
bhi_mitigation = BHI_MITIGATION_OFF;
else if (!strcmp(str, "on"))
bhi_mitigation = BHI_MITIGATION_ON;
else if (!strcmp(str, "auto"))
bhi_mitigation = BHI_MITIGATION_AUTO;
else
pr_err("Ignoring unknown spectre_bhi option (%s)", str);
return 0;
}
early_param("spectre_bhi", spectre_bhi_parse_cmdline);
static void __init bhi_select_mitigation(void)
{
if (bhi_mitigation == BHI_MITIGATION_OFF)
return;
/* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
!(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
return;
if (spec_ctrl_bhi_dis())
return;
if (!IS_ENABLED(CONFIG_X86_64))
return;
/* Mitigate KVM by default */
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
if (bhi_mitigation == BHI_MITIGATION_AUTO)
return;
/* Mitigate syscalls when the mitigation is forced =on */
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
}
static void __init spectre_v2_select_mitigation(void) static void __init spectre_v2_select_mitigation(void)
{ {
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@ -1718,6 +1791,9 @@ static void __init spectre_v2_select_mitigation(void)
mode == SPECTRE_V2_RETPOLINE) mode == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba(); spec_ctrl_disable_kernel_rrsba();
if (boot_cpu_has(X86_BUG_BHI))
bhi_select_mitigation();
spectre_v2_enabled = mode; spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]); pr_info("%s\n", spectre_v2_strings[mode]);
@ -2695,15 +2771,15 @@ static char *stibp_state(void)
switch (spectre_v2_user_stibp) { switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE: case SPECTRE_V2_USER_NONE:
return ", STIBP: disabled"; return "; STIBP: disabled";
case SPECTRE_V2_USER_STRICT: case SPECTRE_V2_USER_STRICT:
return ", STIBP: forced"; return "; STIBP: forced";
case SPECTRE_V2_USER_STRICT_PREFERRED: case SPECTRE_V2_USER_STRICT_PREFERRED:
return ", STIBP: always-on"; return "; STIBP: always-on";
case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP: case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp)) if (static_key_enabled(&switch_to_cond_stibp))
return ", STIBP: conditional"; return "; STIBP: conditional";
} }
return ""; return "";
} }
@ -2712,10 +2788,10 @@ static char *ibpb_state(void)
{ {
if (boot_cpu_has(X86_FEATURE_IBPB)) { if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb)) if (static_key_enabled(&switch_mm_always_ibpb))
return ", IBPB: always-on"; return "; IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb)) if (static_key_enabled(&switch_mm_cond_ibpb))
return ", IBPB: conditional"; return "; IBPB: conditional";
return ", IBPB: disabled"; return "; IBPB: disabled";
} }
return ""; return "";
} }
@ -2725,14 +2801,31 @@ static char *pbrsb_eibrs_state(void)
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
return ", PBRSB-eIBRS: SW sequence"; return "; PBRSB-eIBRS: SW sequence";
else else
return ", PBRSB-eIBRS: Vulnerable"; return "; PBRSB-eIBRS: Vulnerable";
} else { } else {
return ", PBRSB-eIBRS: Not affected"; return "; PBRSB-eIBRS: Not affected";
} }
} }
static const char * const spectre_bhi_state(void)
{
if (!boot_cpu_has_bug(X86_BUG_BHI))
return "; BHI: Not affected";
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
return "; BHI: BHI_DIS_S";
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
return "; BHI: SW loop, KVM: SW loop";
else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
!(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
return "; BHI: Retpoline";
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
return "; BHI: Syscall hardening, KVM: SW loop";
return "; BHI: Vulnerable (Syscall hardening enabled)";
}
static ssize_t spectre_v2_show_state(char *buf) static ssize_t spectre_v2_show_state(char *buf)
{ {
if (spectre_v2_enabled == SPECTRE_V2_LFENCE) if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@ -2745,13 +2838,15 @@ static ssize_t spectre_v2_show_state(char *buf)
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled], spectre_v2_strings[spectre_v2_enabled],
ibpb_state(), ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
stibp_state(), stibp_state(),
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
pbrsb_eibrs_state(), pbrsb_eibrs_state(),
spectre_bhi_state(),
/* this should always be at the end */
spectre_v2_module_string()); spectre_v2_module_string());
} }

View File

@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SPECTRE_V2 BIT(8) #define NO_SPECTRE_V2 BIT(8)
#define NO_MMIO BIT(9) #define NO_MMIO BIT(9)
#define NO_EIBRS_PBRSB BIT(10) #define NO_EIBRS_PBRSB BIT(10)
#define NO_BHI BIT(11)
#define VULNWL(vendor, family, model, whitelist) \ #define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */ /* AMD Family 0xf - 0x12 */
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
/* Zhaoxin Family 7 */ /* Zhaoxin Family 7 */
VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
{} {}
}; };
@ -1435,6 +1436,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (vulnerable_to_rfds(ia32_cap)) if (vulnerable_to_rfds(ia32_cap))
setup_force_cpu_bug(X86_BUG_RFDS); setup_force_cpu_bug(X86_BUG_RFDS);
/* When virtualized, eIBRS could be hidden, assume vulnerable */
if (!(ia32_cap & ARCH_CAP_BHI_NO) &&
!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
(boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
setup_force_cpu_bug(X86_BUG_BHI);
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return; return;

View File

@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },

View File

@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) #define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) #define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) #define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
/* CPUID level 0x80000007 (EDX). */ /* CPUID level 0x80000007 (EDX). */
@ -128,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC); KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
default: default:
return x86_feature; return x86_feature;
} }

View File

@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
call vmx_spec_ctrl_restore_host call vmx_spec_ctrl_restore_host
CLEAR_BRANCH_HISTORY_VMEXIT
/* Put return value in AX */ /* Put return value in AX */
mov %_ASM_BX, %_ASM_AX mov %_ASM_BX, %_ASM_AX

View File

@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
static u64 kvm_get_arch_capabilities(void) static u64 kvm_get_arch_capabilities(void)
{ {