x86 mitigations for the native BHI hardware vulnerabilty:
Branch History Injection (BHI) attacks may allow a malicious application to influence indirect branch prediction in kernel by poisoning the branch history. eIBRS isolates indirect branch targets in ring0. The BHB can still influence the choice of indirect branch predictor entry, and although branch predictor entries are isolated between modes when eIBRS is enabled, the BHB itself is not isolated between modes. Add mitigations against it either with the help of microcode or with software sequences for the affected CPUs. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmYUKPMTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYofT8EACJJix+GzGUcJjOvfWFZcxwziY152hO 5XSzHOZZL6oz5Yk/Rye/S9RVTN7aDjn1CEvI0cD/ULxaTP869sS9dDdUcHhEJ//5 6hjqWsWiKc1QmLjBy3Pcb97GZHQXM5a9D1f6jXnJD+0FMLbQHpzSEBit0H4tv/TC 75myGgYihvUbhN9/bL10M5fz+UADU42nChvPWDMr9ukljjCqa46tPTmKUIAW5TWj /xsyf+Nk+4kZpdaidKGhpof6KCV2rNeevvzUGN8Pv5y13iAmvlyplqTcQ6dlubnZ CuDX5Ji9spNF9WmhKpLgy5N+Ocb64oVHov98N2zw1sT1N8XOYcSM0fBj7SQIFURs L7T4jBZS+1c3ZGJPPFWIaGjV8w1ZMhelglwJxjY7ZgRD6fK3mwRx/ks54J8H4HjE FbirXaZLeKlscDIOKtnxxKoIGwpdGwLKQYi/wEw7F9NhCLSj9wMia+j3uYIUEEHr 6xEiYEtyjcV3ocxagH7eiHyrasOKG64vjx2h1XodusBA2Wrvgm/jXlchUu+wb6B4 LiiZJt+DmOdQ1h5j3r2rt3hw7+nWa7kyq34qfN6NSUCHiedp6q7BClueSaKiOCGk RoNibNiS+CqaxwGxj/RGuvajEJeEMCsLuCxzT3aeaDBsqscW6Ka/HkGA76Tpb5nJ E3JyjYE7AlG4rw== =W0W3 -----END PGP SIGNATURE----- Merge tag 'nativebhi' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 mitigations from Thomas Gleixner: "Mitigations for the native BHI hardware vulnerabilty: Branch History Injection (BHI) attacks may allow a malicious application to influence indirect branch prediction in kernel by poisoning the branch history. eIBRS isolates indirect branch targets in ring0. The BHB can still influence the choice of indirect branch predictor entry, and although branch predictor entries are isolated between modes when eIBRS is enabled, the BHB itself is not isolated between modes. Add mitigations against it either with the help of microcode or with software sequences for the affected CPUs" [ This also ends up enabling the full mitigation by default despite the system call hardening, because apparently there are other indirect calls that are still sufficiently reachable, and the 'auto' case just isn't hardened enough. We'll have some more inevitable tweaking in the future - Linus ] * tag 'nativebhi' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: KVM: x86: Add BHI_NO x86/bhi: Mitigate KVM by default x86/bhi: Add BHI mitigation knob x86/bhi: Enumerate Branch History Injection (BHI) bug x86/bhi: Define SPEC_CTRL_BHI_DIS_S x86/bhi: Add support for clearing branch history at syscall entry x86/syscall: Don't force use of indirect calls for system calls x86/bugs: Change commas to semicolons in 'spectre_v2' sysfs file
This commit is contained in:
commit
2bb69f5fc7
|
@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
|
|||
the BHB might be shared across privilege levels even in the presence of
|
||||
Enhanced IBRS.
|
||||
|
||||
Currently the only known real-world BHB attack vector is via
|
||||
unprivileged eBPF. Therefore, it's highly recommended to not enable
|
||||
unprivileged eBPF, especially when eIBRS is used (without retpolines).
|
||||
For a full mitigation against BHB attacks, it's recommended to use
|
||||
retpolines (or eIBRS combined with retpolines).
|
||||
Previously the only known real-world BHB attack vector was via unprivileged
|
||||
eBPF. Further research has found attacks that don't require unprivileged eBPF.
|
||||
For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
|
||||
use the BHB clearing sequence.
|
||||
|
||||
Attack scenarios
|
||||
----------------
|
||||
|
@ -430,6 +429,23 @@ The possible values in this file are:
|
|||
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
|
||||
=========================== =======================================================
|
||||
|
||||
- Branch History Injection (BHI) protection status:
|
||||
|
||||
.. list-table::
|
||||
|
||||
* - BHI: Not affected
|
||||
- System is not affected
|
||||
* - BHI: Retpoline
|
||||
- System is protected by retpoline
|
||||
* - BHI: BHI_DIS_S
|
||||
- System is protected by BHI_DIS_S
|
||||
* - BHI: SW loop; KVM SW loop
|
||||
- System is protected by software clearing sequence
|
||||
* - BHI: Syscall hardening
|
||||
- Syscalls are hardened against BHI
|
||||
* - BHI: Syscall hardening; KVM: SW loop
|
||||
- System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence
|
||||
|
||||
Full mitigation might require a microcode update from the CPU
|
||||
vendor. When the necessary microcode is not available, the kernel will
|
||||
report vulnerability.
|
||||
|
@ -484,7 +500,11 @@ Spectre variant 2
|
|||
|
||||
Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
|
||||
boot, by setting the IBRS bit, and they're automatically protected against
|
||||
Spectre v2 variant attacks.
|
||||
some Spectre v2 variant attacks. The BHB can still influence the choice of
|
||||
indirect branch predictor entry, and although branch predictor entries are
|
||||
isolated between modes when eIBRS is enabled, the BHB itself is not isolated
|
||||
between modes. Systems which support BHI_DIS_S will set it to protect against
|
||||
BHI attacks.
|
||||
|
||||
On Intel's enhanced IBRS systems, this includes cross-thread branch target
|
||||
injections on SMT systems (STIBP). In other words, Intel eIBRS enables
|
||||
|
@ -638,6 +658,22 @@ kernel command line.
|
|||
spectre_v2=off. Spectre variant 1 mitigations
|
||||
cannot be disabled.
|
||||
|
||||
spectre_bhi=
|
||||
|
||||
[X86] Control mitigation of Branch History Injection
|
||||
(BHI) vulnerability. Syscalls are hardened against BHI
|
||||
regardless of this setting. This setting affects the deployment
|
||||
of the HW BHI control and the SW BHB clearing sequence.
|
||||
|
||||
on
|
||||
unconditionally enable.
|
||||
off
|
||||
unconditionally disable.
|
||||
auto
|
||||
enable if hardware mitigation
|
||||
control(BHI_DIS_S) is available, otherwise
|
||||
enable alternate mitigation in KVM.
|
||||
|
||||
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
|
||||
|
||||
Mitigation selection guide
|
||||
|
|
|
@ -6063,6 +6063,18 @@
|
|||
sonypi.*= [HW] Sony Programmable I/O Control Device driver
|
||||
See Documentation/admin-guide/laptops/sonypi.rst
|
||||
|
||||
spectre_bhi= [X86] Control mitigation of Branch History Injection
|
||||
(BHI) vulnerability. Syscalls are hardened against BHI
|
||||
reglardless of this setting. This setting affects the
|
||||
deployment of the HW BHI control and the SW BHB
|
||||
clearing sequence.
|
||||
|
||||
on - unconditionally enable.
|
||||
off - unconditionally disable.
|
||||
auto - (default) enable hardware mitigation
|
||||
(BHI_DIS_S) if available, otherwise enable
|
||||
alternate mitigation in KVM.
|
||||
|
||||
spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
|
||||
(indirect branch speculation) vulnerability.
|
||||
The default operation protects the kernel from
|
||||
|
|
|
@ -2633,6 +2633,32 @@ config MITIGATION_RFDS
|
|||
stored in floating point, vector and integer registers.
|
||||
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
|
||||
|
||||
choice
|
||||
prompt "Clear branch history"
|
||||
depends on CPU_SUP_INTEL
|
||||
default SPECTRE_BHI_ON
|
||||
help
|
||||
Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
|
||||
where the branch history buffer is poisoned to speculatively steer
|
||||
indirect branches.
|
||||
See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
|
||||
|
||||
config SPECTRE_BHI_ON
|
||||
bool "on"
|
||||
help
|
||||
Equivalent to setting spectre_bhi=on command line parameter.
|
||||
config SPECTRE_BHI_OFF
|
||||
bool "off"
|
||||
help
|
||||
Equivalent to setting spectre_bhi=off command line parameter.
|
||||
config SPECTRE_BHI_AUTO
|
||||
bool "auto"
|
||||
depends on BROKEN
|
||||
help
|
||||
Equivalent to setting spectre_bhi=auto command line parameter.
|
||||
|
||||
endchoice
|
||||
|
||||
endif
|
||||
|
||||
config ARCH_HAS_ADD_PAGES
|
||||
|
|
|
@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
|
|||
|
||||
if (likely(unr < NR_syscalls)) {
|
||||
unr = array_index_nospec(unr, NR_syscalls);
|
||||
regs->ax = sys_call_table[unr](regs);
|
||||
regs->ax = x64_sys_call(regs, unr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
|
|||
|
||||
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
|
||||
xnr = array_index_nospec(xnr, X32_NR_syscalls);
|
||||
regs->ax = x32_sys_call_table[xnr](regs);
|
||||
regs->ax = x32_sys_call(regs, xnr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
|
|||
|
||||
if (likely(unr < IA32_NR_syscalls)) {
|
||||
unr = array_index_nospec(unr, IA32_NR_syscalls);
|
||||
regs->ax = ia32_sys_call_table[unr](regs);
|
||||
regs->ax = ia32_sys_call(regs, unr);
|
||||
} else if (nr != -1) {
|
||||
regs->ax = __ia32_sys_ni_syscall(regs);
|
||||
}
|
||||
|
@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
|
|||
}
|
||||
|
||||
/**
|
||||
* int80_emulation - 32-bit legacy syscall entry
|
||||
* do_int80_emulation - 32-bit legacy syscall C entry from asm
|
||||
*
|
||||
* This entry point can be used by 32-bit and 64-bit programs to perform
|
||||
* 32-bit system calls. Instances of INT $0x80 can be found inline in
|
||||
|
@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
|
|||
* eax: system call number
|
||||
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
|
||||
*/
|
||||
DEFINE_IDTENTRY_RAW(int80_emulation)
|
||||
__visible noinstr void do_int80_emulation(struct pt_regs *regs)
|
||||
{
|
||||
int nr;
|
||||
|
||||
|
|
|
@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
|
|||
/* clobbers %rax, make sure it is after saving the syscall nr */
|
||||
IBRS_ENTER
|
||||
UNTRAIN_RET
|
||||
CLEAR_BRANCH_HISTORY
|
||||
|
||||
call do_syscall_64 /* returns with IRQs disabled */
|
||||
|
||||
|
@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
|
|||
call make_task_dead
|
||||
SYM_CODE_END(rewind_stack_and_make_dead)
|
||||
.popsection
|
||||
|
||||
/*
|
||||
* This sequence executes branches in order to remove user branch information
|
||||
* from the branch history tracker in the Branch Predictor, therefore removing
|
||||
* user influence on subsequent BTB lookups.
|
||||
*
|
||||
* It should be used on parts prior to Alder Lake. Newer parts should use the
|
||||
* BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
|
||||
* virtualized on newer hardware the VMM should protect against BHI attacks by
|
||||
* setting BHI_DIS_S for the guests.
|
||||
*
|
||||
* CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
|
||||
* and not clearing the branch history. The call tree looks like:
|
||||
*
|
||||
* call 1
|
||||
* call 2
|
||||
* call 2
|
||||
* call 2
|
||||
* call 2
|
||||
* call 2
|
||||
* ret
|
||||
* ret
|
||||
* ret
|
||||
* ret
|
||||
* ret
|
||||
* ret
|
||||
*
|
||||
* This means that the stack is non-constant and ORC can't unwind it with %rsp
|
||||
* alone. Therefore we unconditionally set up the frame pointer, which allows
|
||||
* ORC to unwind properly.
|
||||
*
|
||||
* The alignment is for performance and not for safety, and may be safely
|
||||
* refactored in the future if needed.
|
||||
*/
|
||||
SYM_FUNC_START(clear_bhb_loop)
|
||||
push %rbp
|
||||
mov %rsp, %rbp
|
||||
movl $5, %ecx
|
||||
ANNOTATE_INTRA_FUNCTION_CALL
|
||||
call 1f
|
||||
jmp 5f
|
||||
.align 64, 0xcc
|
||||
ANNOTATE_INTRA_FUNCTION_CALL
|
||||
1: call 2f
|
||||
RET
|
||||
.align 64, 0xcc
|
||||
2: movl $5, %eax
|
||||
3: jmp 4f
|
||||
nop
|
||||
4: sub $1, %eax
|
||||
jnz 3b
|
||||
sub $1, %ecx
|
||||
jnz 1b
|
||||
RET
|
||||
5: lfence
|
||||
pop %rbp
|
||||
RET
|
||||
SYM_FUNC_END(clear_bhb_loop)
|
||||
EXPORT_SYMBOL_GPL(clear_bhb_loop)
|
||||
STACK_FRAME_NON_STANDARD(clear_bhb_loop)
|
||||
|
|
|
@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
|
|||
|
||||
IBRS_ENTER
|
||||
UNTRAIN_RET
|
||||
CLEAR_BRANCH_HISTORY
|
||||
|
||||
/*
|
||||
* SYSENTER doesn't filter flags, so we need to clear NT and AC
|
||||
|
@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
|
|||
|
||||
IBRS_ENTER
|
||||
UNTRAIN_RET
|
||||
CLEAR_BRANCH_HISTORY
|
||||
|
||||
movq %rsp, %rdi
|
||||
call do_fast_syscall_32
|
||||
|
@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
|
|||
ANNOTATE_NOENDBR
|
||||
int3
|
||||
SYM_CODE_END(entry_SYSCALL_compat)
|
||||
|
||||
/*
|
||||
* int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
|
||||
* point to C routines, however since this is a system call interface the branch
|
||||
* history needs to be scrubbed to protect against BHI attacks, and that
|
||||
* scrubbing needs to take place in assembly code prior to entering any C
|
||||
* routines.
|
||||
*/
|
||||
SYM_CODE_START(int80_emulation)
|
||||
ANNOTATE_NOENDBR
|
||||
UNWIND_HINT_FUNC
|
||||
CLEAR_BRANCH_HISTORY
|
||||
jmp do_int80_emulation
|
||||
SYM_CODE_END(int80_emulation)
|
||||
|
|
|
@ -18,8 +18,25 @@
|
|||
#include <asm/syscalls_32.h>
|
||||
#undef __SYSCALL
|
||||
|
||||
/*
|
||||
* The sys_call_table[] is no longer used for system calls, but
|
||||
* kernel/trace/trace_syscalls.c still wants to know the system
|
||||
* call address.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
#define __SYSCALL(nr, sym) __ia32_##sym,
|
||||
|
||||
__visible const sys_call_ptr_t ia32_sys_call_table[] = {
|
||||
const sys_call_ptr_t sys_call_table[] = {
|
||||
#include <asm/syscalls_32.h>
|
||||
};
|
||||
#undef __SYSCALL
|
||||
#endif
|
||||
|
||||
#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
|
||||
|
||||
long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
|
||||
{
|
||||
switch (nr) {
|
||||
#include <asm/syscalls_32.h>
|
||||
default: return __ia32_sys_ni_syscall(regs);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -11,8 +11,23 @@
|
|||
#include <asm/syscalls_64.h>
|
||||
#undef __SYSCALL
|
||||
|
||||
/*
|
||||
* The sys_call_table[] is no longer used for system calls, but
|
||||
* kernel/trace/trace_syscalls.c still wants to know the system
|
||||
* call address.
|
||||
*/
|
||||
#define __SYSCALL(nr, sym) __x64_##sym,
|
||||
|
||||
asmlinkage const sys_call_ptr_t sys_call_table[] = {
|
||||
const sys_call_ptr_t sys_call_table[] = {
|
||||
#include <asm/syscalls_64.h>
|
||||
};
|
||||
#undef __SYSCALL
|
||||
|
||||
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
|
||||
|
||||
long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
|
||||
{
|
||||
switch (nr) {
|
||||
#include <asm/syscalls_64.h>
|
||||
default: return __x64_sys_ni_syscall(regs);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -11,8 +11,12 @@
|
|||
#include <asm/syscalls_x32.h>
|
||||
#undef __SYSCALL
|
||||
|
||||
#define __SYSCALL(nr, sym) __x64_##sym,
|
||||
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
|
||||
|
||||
asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
|
||||
#include <asm/syscalls_x32.h>
|
||||
long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
|
||||
{
|
||||
switch (nr) {
|
||||
#include <asm/syscalls_x32.h>
|
||||
default: return __x64_sys_ni_syscall(regs);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -461,11 +461,15 @@
|
|||
|
||||
/*
|
||||
* Extended auxiliary flags: Linux defined - for features scattered in various
|
||||
* CPUID levels like 0x80000022, etc.
|
||||
* CPUID levels like 0x80000022, etc and Linux defined features.
|
||||
*
|
||||
* Reuse free bits when adding new feature flags!
|
||||
*/
|
||||
#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
|
||||
#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
|
||||
#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
|
||||
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
|
||||
#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
|
@ -515,4 +519,5 @@
|
|||
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
|
||||
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
|
||||
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
|
||||
#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
|
|
@ -61,10 +61,13 @@
|
|||
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
|
||||
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
|
||||
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
|
||||
#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
|
||||
#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
|
||||
|
||||
/* A mask for bits which the kernel toggles when controlling mitigations */
|
||||
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
|
||||
| SPEC_CTRL_RRSBA_DIS_S)
|
||||
| SPEC_CTRL_RRSBA_DIS_S \
|
||||
| SPEC_CTRL_BHI_DIS_S)
|
||||
|
||||
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
|
||||
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
|
||||
|
@ -163,6 +166,10 @@
|
|||
* are restricted to targets in
|
||||
* kernel.
|
||||
*/
|
||||
#define ARCH_CAP_BHI_NO BIT(20) /*
|
||||
* CPU is not affected by Branch
|
||||
* History Injection.
|
||||
*/
|
||||
#define ARCH_CAP_PBRSB_NO BIT(24) /*
|
||||
* Not susceptible to Post-Barrier
|
||||
* Return Stack Buffer Predictions.
|
||||
|
|
|
@ -326,6 +326,19 @@
|
|||
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
.macro CLEAR_BRANCH_HISTORY
|
||||
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
|
||||
.endm
|
||||
|
||||
.macro CLEAR_BRANCH_HISTORY_VMEXIT
|
||||
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
|
||||
.endm
|
||||
#else
|
||||
#define CLEAR_BRANCH_HISTORY
|
||||
#define CLEAR_BRANCH_HISTORY_VMEXIT
|
||||
#endif
|
||||
|
||||
#else /* __ASSEMBLY__ */
|
||||
|
||||
#define ANNOTATE_RETPOLINE_SAFE \
|
||||
|
@ -368,6 +381,10 @@ extern void srso_alias_return_thunk(void);
|
|||
extern void entry_untrain_ret(void);
|
||||
extern void entry_ibpb(void);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
extern void clear_bhb_loop(void);
|
||||
#endif
|
||||
|
||||
extern void (*x86_return_thunk)(void);
|
||||
|
||||
extern void __warn_thunk(void);
|
||||
|
|
|
@ -16,19 +16,17 @@
|
|||
#include <asm/thread_info.h> /* for TS_COMPAT */
|
||||
#include <asm/unistd.h>
|
||||
|
||||
/* This is used purely for kernel/trace/trace_syscalls.c */
|
||||
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
|
||||
extern const sys_call_ptr_t sys_call_table[];
|
||||
|
||||
#if defined(CONFIG_X86_32)
|
||||
#define ia32_sys_call_table sys_call_table
|
||||
#else
|
||||
/*
|
||||
* These may not exist, but still put the prototypes in so we
|
||||
* can use IS_ENABLED().
|
||||
*/
|
||||
extern const sys_call_ptr_t ia32_sys_call_table[];
|
||||
extern const sys_call_ptr_t x32_sys_call_table[];
|
||||
#endif
|
||||
extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
|
||||
extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
|
||||
extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
|
||||
|
||||
/*
|
||||
* Only the low 32 bits of orig_ax are meaningful, so we return int.
|
||||
|
@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
|
|||
}
|
||||
|
||||
bool do_syscall_64(struct pt_regs *regs, int nr);
|
||||
void do_int80_emulation(struct pt_regs *regs);
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
|
|
|
@ -1607,6 +1607,79 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
|
|||
dump_stack();
|
||||
}
|
||||
|
||||
/*
|
||||
* Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
|
||||
* branch history in userspace. Not needed if BHI_NO is set.
|
||||
*/
|
||||
static bool __init spec_ctrl_bhi_dis(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
|
||||
return false;
|
||||
|
||||
x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
|
||||
update_spec_ctrl(x86_spec_ctrl_base);
|
||||
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
enum bhi_mitigations {
|
||||
BHI_MITIGATION_OFF,
|
||||
BHI_MITIGATION_ON,
|
||||
BHI_MITIGATION_AUTO,
|
||||
};
|
||||
|
||||
static enum bhi_mitigations bhi_mitigation __ro_after_init =
|
||||
IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON :
|
||||
IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF :
|
||||
BHI_MITIGATION_AUTO;
|
||||
|
||||
static int __init spectre_bhi_parse_cmdline(char *str)
|
||||
{
|
||||
if (!str)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcmp(str, "off"))
|
||||
bhi_mitigation = BHI_MITIGATION_OFF;
|
||||
else if (!strcmp(str, "on"))
|
||||
bhi_mitigation = BHI_MITIGATION_ON;
|
||||
else if (!strcmp(str, "auto"))
|
||||
bhi_mitigation = BHI_MITIGATION_AUTO;
|
||||
else
|
||||
pr_err("Ignoring unknown spectre_bhi option (%s)", str);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("spectre_bhi", spectre_bhi_parse_cmdline);
|
||||
|
||||
static void __init bhi_select_mitigation(void)
|
||||
{
|
||||
if (bhi_mitigation == BHI_MITIGATION_OFF)
|
||||
return;
|
||||
|
||||
/* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
|
||||
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
|
||||
!(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
|
||||
return;
|
||||
|
||||
if (spec_ctrl_bhi_dis())
|
||||
return;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_X86_64))
|
||||
return;
|
||||
|
||||
/* Mitigate KVM by default */
|
||||
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
|
||||
pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
|
||||
|
||||
if (bhi_mitigation == BHI_MITIGATION_AUTO)
|
||||
return;
|
||||
|
||||
/* Mitigate syscalls when the mitigation is forced =on */
|
||||
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
|
||||
pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
|
||||
}
|
||||
|
||||
static void __init spectre_v2_select_mitigation(void)
|
||||
{
|
||||
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
|
||||
|
@ -1718,6 +1791,9 @@ static void __init spectre_v2_select_mitigation(void)
|
|||
mode == SPECTRE_V2_RETPOLINE)
|
||||
spec_ctrl_disable_kernel_rrsba();
|
||||
|
||||
if (boot_cpu_has(X86_BUG_BHI))
|
||||
bhi_select_mitigation();
|
||||
|
||||
spectre_v2_enabled = mode;
|
||||
pr_info("%s\n", spectre_v2_strings[mode]);
|
||||
|
||||
|
@ -2695,15 +2771,15 @@ static char *stibp_state(void)
|
|||
|
||||
switch (spectre_v2_user_stibp) {
|
||||
case SPECTRE_V2_USER_NONE:
|
||||
return ", STIBP: disabled";
|
||||
return "; STIBP: disabled";
|
||||
case SPECTRE_V2_USER_STRICT:
|
||||
return ", STIBP: forced";
|
||||
return "; STIBP: forced";
|
||||
case SPECTRE_V2_USER_STRICT_PREFERRED:
|
||||
return ", STIBP: always-on";
|
||||
return "; STIBP: always-on";
|
||||
case SPECTRE_V2_USER_PRCTL:
|
||||
case SPECTRE_V2_USER_SECCOMP:
|
||||
if (static_key_enabled(&switch_to_cond_stibp))
|
||||
return ", STIBP: conditional";
|
||||
return "; STIBP: conditional";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
@ -2712,10 +2788,10 @@ static char *ibpb_state(void)
|
|||
{
|
||||
if (boot_cpu_has(X86_FEATURE_IBPB)) {
|
||||
if (static_key_enabled(&switch_mm_always_ibpb))
|
||||
return ", IBPB: always-on";
|
||||
return "; IBPB: always-on";
|
||||
if (static_key_enabled(&switch_mm_cond_ibpb))
|
||||
return ", IBPB: conditional";
|
||||
return ", IBPB: disabled";
|
||||
return "; IBPB: conditional";
|
||||
return "; IBPB: disabled";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
@ -2725,14 +2801,31 @@ static char *pbrsb_eibrs_state(void)
|
|||
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
|
||||
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
|
||||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
|
||||
return ", PBRSB-eIBRS: SW sequence";
|
||||
return "; PBRSB-eIBRS: SW sequence";
|
||||
else
|
||||
return ", PBRSB-eIBRS: Vulnerable";
|
||||
return "; PBRSB-eIBRS: Vulnerable";
|
||||
} else {
|
||||
return ", PBRSB-eIBRS: Not affected";
|
||||
return "; PBRSB-eIBRS: Not affected";
|
||||
}
|
||||
}
|
||||
|
||||
static const char * const spectre_bhi_state(void)
|
||||
{
|
||||
if (!boot_cpu_has_bug(X86_BUG_BHI))
|
||||
return "; BHI: Not affected";
|
||||
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
|
||||
return "; BHI: BHI_DIS_S";
|
||||
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
|
||||
return "; BHI: SW loop, KVM: SW loop";
|
||||
else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
|
||||
!(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
|
||||
return "; BHI: Retpoline";
|
||||
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
|
||||
return "; BHI: Syscall hardening, KVM: SW loop";
|
||||
|
||||
return "; BHI: Vulnerable (Syscall hardening enabled)";
|
||||
}
|
||||
|
||||
static ssize_t spectre_v2_show_state(char *buf)
|
||||
{
|
||||
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
|
||||
|
@ -2745,13 +2838,15 @@ static ssize_t spectre_v2_show_state(char *buf)
|
|||
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
|
||||
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
|
||||
|
||||
return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
|
||||
return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
|
||||
spectre_v2_strings[spectre_v2_enabled],
|
||||
ibpb_state(),
|
||||
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
|
||||
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
|
||||
stibp_state(),
|
||||
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
|
||||
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
|
||||
pbrsb_eibrs_state(),
|
||||
spectre_bhi_state(),
|
||||
/* this should always be at the end */
|
||||
spectre_v2_module_string());
|
||||
}
|
||||
|
||||
|
|
|
@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
|
|||
#define NO_SPECTRE_V2 BIT(8)
|
||||
#define NO_MMIO BIT(9)
|
||||
#define NO_EIBRS_PBRSB BIT(10)
|
||||
#define NO_BHI BIT(11)
|
||||
|
||||
#define VULNWL(vendor, family, model, whitelist) \
|
||||
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
|
||||
|
@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
|
|||
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
|
||||
|
||||
/* AMD Family 0xf - 0x12 */
|
||||
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
|
||||
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
|
||||
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
|
||||
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
|
||||
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
|
||||
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
|
||||
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
|
||||
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
|
||||
|
||||
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
|
||||
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
|
||||
VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
|
||||
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
|
||||
VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
|
||||
|
||||
/* Zhaoxin Family 7 */
|
||||
VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
|
||||
VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
|
||||
VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
|
||||
VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -1435,6 +1436,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
|||
if (vulnerable_to_rfds(ia32_cap))
|
||||
setup_force_cpu_bug(X86_BUG_RFDS);
|
||||
|
||||
/* When virtualized, eIBRS could be hidden, assume vulnerable */
|
||||
if (!(ia32_cap & ARCH_CAP_BHI_NO) &&
|
||||
!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
|
||||
(boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
|
||||
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
|
||||
setup_force_cpu_bug(X86_BUG_BHI);
|
||||
|
||||
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
|
||||
return;
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
|
|||
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
|
||||
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
|
||||
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
|
||||
{ X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
|
||||
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
|
||||
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
|
||||
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
|
||||
|
|
|
@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
|
|||
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
|
||||
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
|
||||
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
|
||||
#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
|
||||
#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
|
||||
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
|
||||
|
||||
/* CPUID level 0x80000007 (EDX). */
|
||||
|
@ -128,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
|
|||
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
|
||||
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
|
||||
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
|
||||
KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
|
||||
default:
|
||||
return x86_feature;
|
||||
}
|
||||
|
|
|
@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
|
|||
|
||||
call vmx_spec_ctrl_restore_host
|
||||
|
||||
CLEAR_BRANCH_HISTORY_VMEXIT
|
||||
|
||||
/* Put return value in AX */
|
||||
mov %_ASM_BX, %_ASM_AX
|
||||
|
||||
|
|
|
@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
|
|||
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
|
||||
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
|
||||
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
|
||||
ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
|
||||
ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
|
||||
|
||||
static u64 kvm_get_arch_capabilities(void)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue