mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 01:09:38 -05:00
- Add a spectre_bhi=vmexit mitigation option aimed at cloud
environments - Remove duplicated Spectre cmdline option documentation - Add separate macro definitions for syscall handlers which do not return in order to address objtool warnings -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmaVXXMACgkQEsHwGGHe VUrd3A/9FFJZcpxdpWJikyEskb3CO1xthfM/6QvV5U3/Nldpz4aROEteqsMYc+xB OcA/RkCc8mBBFuydZjNxlNwyMXkoab/rQJC/Dz7q1O61sho4RWk8yCh6xM1JRofF WeKGCClz1KnsCc8FlVaHAEhp6gBMJiiqawjXBklfHhUqmbY7UZgcAyeM3uMIwAEG qCS7opOSZVijJadoyvROf5na23hggUVO++qS4HYT66G3bI3MdEEWp06dUxXBD/Er 2zRAY6III4wuGTxe8L49ftsyW9RS7AKY2rUmhpffkeA8tLYBfXogYVSQYyR3S9Ou gZg9Yeu64rjqZZUYpzRR+kATUpuSKO6nQBHxd+ICRIUbzSmXUNzvPTi5SWSWh2vC HTLgFbGXxg8fLlpqCJ21oaU982w3eteOJ+wgf/AH3hBykFljck9EcaGsaQ5OfeDE MA0XaDy2V4jypyxmLpRfRIWJWtNVTgza2Jl0Dg3X+UipAXtvCvJzW1ZJ0ksA+2P0 K1GeWy4tC51uFndeYpNC1eQ0cJjv1mfAugHcqgVdAhwMYUZdXchaPJHr/fcF7AEG xjV7fnoGK6WKKUni+Tnmom3FzBVDztKAtZ4iYgwIWReRj9bKLhP2k779rMXkCftt WtiencSCtVn+K/4acYBx0vbRKlDv769Lq64FZ8xNgGw6uRXjhhM= =AP9P -----END PGP SIGNATURE----- Merge tag 'x86_bugs_for_v6.11_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 cpu mitigation updates from Borislav Petkov: - Add a spectre_bhi=vmexit mitigation option aimed at cloud environments - Remove duplicated Spectre cmdline option documentation - Add separate macro definitions for syscall handlers which do not return in order to address objtool warnings * tag 'x86_bugs_for_v6.11_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/bugs: Add 'spectre_bhi=vmexit' cmdline option x86/bugs: Remove duplicate Spectre cmdline option descriptions x86/syscall: Mark exit[_group] syscall handlers __noreturn
This commit is contained in:
commit
2439a5eaa7
12 changed files with 86 additions and 109 deletions
|
@ -592,85 +592,19 @@ Spectre variant 2
|
|||
Mitigation control on the kernel command line
|
||||
---------------------------------------------
|
||||
|
||||
Spectre variant 2 mitigation can be disabled or force enabled at the
|
||||
kernel command line.
|
||||
In general the kernel selects reasonable default mitigations for the
|
||||
current CPU.
|
||||
|
||||
nospectre_v1
|
||||
Spectre default mitigations can be disabled or changed at the kernel
|
||||
command line with the following options:
|
||||
|
||||
[X86,PPC] Disable mitigations for Spectre Variant 1
|
||||
(bounds check bypass). With this option data leaks are
|
||||
possible in the system.
|
||||
- nospectre_v1
|
||||
- nospectre_v2
|
||||
- spectre_v2={option}
|
||||
- spectre_v2_user={option}
|
||||
- spectre_bhi={option}
|
||||
|
||||
nospectre_v2
|
||||
|
||||
[X86] Disable all mitigations for the Spectre variant 2
|
||||
(indirect branch prediction) vulnerability. System may
|
||||
allow data leaks with this option, which is equivalent
|
||||
to spectre_v2=off.
|
||||
|
||||
|
||||
spectre_v2=
|
||||
|
||||
[X86] Control mitigation of Spectre variant 2
|
||||
(indirect branch speculation) vulnerability.
|
||||
The default operation protects the kernel from
|
||||
user space attacks.
|
||||
|
||||
on
|
||||
unconditionally enable, implies
|
||||
spectre_v2_user=on
|
||||
off
|
||||
unconditionally disable, implies
|
||||
spectre_v2_user=off
|
||||
auto
|
||||
kernel detects whether your CPU model is
|
||||
vulnerable
|
||||
|
||||
Selecting 'on' will, and 'auto' may, choose a
|
||||
mitigation method at run time according to the
|
||||
CPU, the available microcode, the setting of the
|
||||
CONFIG_MITIGATION_RETPOLINE configuration option,
|
||||
and the compiler with which the kernel was built.
|
||||
|
||||
Selecting 'on' will also enable the mitigation
|
||||
against user space to user space task attacks.
|
||||
|
||||
Selecting 'off' will disable both the kernel and
|
||||
the user space protections.
|
||||
|
||||
Specific mitigations can also be selected manually:
|
||||
|
||||
retpoline auto pick between generic,lfence
|
||||
retpoline,generic Retpolines
|
||||
retpoline,lfence LFENCE; indirect branch
|
||||
retpoline,amd alias for retpoline,lfence
|
||||
eibrs Enhanced/Auto IBRS
|
||||
eibrs,retpoline Enhanced/Auto IBRS + Retpolines
|
||||
eibrs,lfence Enhanced/Auto IBRS + LFENCE
|
||||
ibrs use IBRS to protect kernel
|
||||
|
||||
Not specifying this option is equivalent to
|
||||
spectre_v2=auto.
|
||||
|
||||
In general the kernel by default selects
|
||||
reasonable mitigations for the current CPU. To
|
||||
disable Spectre variant 2 mitigations, boot with
|
||||
spectre_v2=off. Spectre variant 1 mitigations
|
||||
cannot be disabled.
|
||||
|
||||
spectre_bhi=
|
||||
|
||||
[X86] Control mitigation of Branch History Injection
|
||||
(BHI) vulnerability. This setting affects the deployment
|
||||
of the HW BHI control and the SW BHB clearing sequence.
|
||||
|
||||
on
|
||||
(default) Enable the HW or SW mitigation as
|
||||
needed.
|
||||
off
|
||||
Disable the mitigation.
|
||||
|
||||
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
|
||||
For more details on the available options, refer to Documentation/admin-guide/kernel-parameters.txt
|
||||
|
||||
Mitigation selection guide
|
||||
--------------------------
|
||||
|
|
|
@ -6125,9 +6125,15 @@
|
|||
deployment of the HW BHI control and the SW BHB
|
||||
clearing sequence.
|
||||
|
||||
on - (default) Enable the HW or SW mitigation
|
||||
as needed.
|
||||
off - Disable the mitigation.
|
||||
on - (default) Enable the HW or SW mitigation as
|
||||
needed. This protects the kernel from
|
||||
both syscalls and VMs.
|
||||
vmexit - On systems which don't have the HW mitigation
|
||||
available, enable the SW mitigation on vmexit
|
||||
ONLY. On such systems, the host kernel is
|
||||
protected from VM-originated BHI attacks, but
|
||||
may still be vulnerable to syscall attacks.
|
||||
off - Disable the mitigation.
|
||||
|
||||
spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
|
||||
(indirect branch speculation) vulnerability.
|
||||
|
|
|
@ -14,9 +14,12 @@
|
|||
#endif
|
||||
|
||||
#define __SYSCALL(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
|
||||
|
||||
#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __ia32_##sym(const struct pt_regs *);
|
||||
#include <asm/syscalls_32.h>
|
||||
#undef __SYSCALL
|
||||
#undef __SYSCALL
|
||||
|
||||
#undef __SYSCALL_NORETURN
|
||||
#define __SYSCALL_NORETURN __SYSCALL
|
||||
|
||||
/*
|
||||
* The sys_call_table[] is no longer used for system calls, but
|
||||
|
@ -28,11 +31,10 @@
|
|||
const sys_call_ptr_t sys_call_table[] = {
|
||||
#include <asm/syscalls_32.h>
|
||||
};
|
||||
#undef __SYSCALL
|
||||
#undef __SYSCALL
|
||||
#endif
|
||||
|
||||
#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
|
||||
|
||||
long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
|
||||
{
|
||||
switch (nr) {
|
||||
|
|
|
@ -8,8 +8,12 @@
|
|||
#include <asm/syscall.h>
|
||||
|
||||
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
|
||||
#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
|
||||
#include <asm/syscalls_64.h>
|
||||
#undef __SYSCALL
|
||||
#undef __SYSCALL
|
||||
|
||||
#undef __SYSCALL_NORETURN
|
||||
#define __SYSCALL_NORETURN __SYSCALL
|
||||
|
||||
/*
|
||||
* The sys_call_table[] is no longer used for system calls, but
|
||||
|
@ -20,10 +24,9 @@
|
|||
const sys_call_ptr_t sys_call_table[] = {
|
||||
#include <asm/syscalls_64.h>
|
||||
};
|
||||
#undef __SYSCALL
|
||||
#undef __SYSCALL
|
||||
|
||||
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
|
||||
|
||||
long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
|
||||
{
|
||||
switch (nr) {
|
||||
|
|
|
@ -8,11 +8,14 @@
|
|||
#include <asm/syscall.h>
|
||||
|
||||
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
|
||||
#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
|
||||
#include <asm/syscalls_x32.h>
|
||||
#undef __SYSCALL
|
||||
#undef __SYSCALL
|
||||
|
||||
#undef __SYSCALL_NORETURN
|
||||
#define __SYSCALL_NORETURN __SYSCALL
|
||||
|
||||
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
|
||||
|
||||
long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
|
||||
{
|
||||
switch (nr) {
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# 32-bit system call numbers and entry vectors
|
||||
#
|
||||
# The format is:
|
||||
# <number> <abi> <name> <entry point> <compat entry point>
|
||||
# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]]
|
||||
#
|
||||
# The __ia32_sys and __ia32_compat_sys stubs are created on-the-fly for
|
||||
# sys_*() system calls and compat_sys_*() compat system calls if
|
||||
|
@ -13,7 +13,7 @@
|
|||
# The abi is always "i386" for this file.
|
||||
#
|
||||
0 i386 restart_syscall sys_restart_syscall
|
||||
1 i386 exit sys_exit
|
||||
1 i386 exit sys_exit - noreturn
|
||||
2 i386 fork sys_fork
|
||||
3 i386 read sys_read
|
||||
4 i386 write sys_write
|
||||
|
@ -264,7 +264,7 @@
|
|||
249 i386 io_cancel sys_io_cancel
|
||||
250 i386 fadvise64 sys_ia32_fadvise64
|
||||
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
|
||||
252 i386 exit_group sys_exit_group
|
||||
252 i386 exit_group sys_exit_group - noreturn
|
||||
253 i386 lookup_dcookie
|
||||
254 i386 epoll_create sys_epoll_create
|
||||
255 i386 epoll_ctl sys_epoll_ctl
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# 64-bit system call numbers and entry vectors
|
||||
#
|
||||
# The format is:
|
||||
# <number> <abi> <name> <entry point>
|
||||
# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]]
|
||||
#
|
||||
# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
|
||||
#
|
||||
|
@ -69,7 +69,7 @@
|
|||
57 common fork sys_fork
|
||||
58 common vfork sys_vfork
|
||||
59 64 execve sys_execve
|
||||
60 common exit sys_exit
|
||||
60 common exit sys_exit - noreturn
|
||||
61 common wait4 sys_wait4
|
||||
62 common kill sys_kill
|
||||
63 common uname sys_newuname
|
||||
|
@ -240,7 +240,7 @@
|
|||
228 common clock_gettime sys_clock_gettime
|
||||
229 common clock_getres sys_clock_getres
|
||||
230 common clock_nanosleep sys_clock_nanosleep
|
||||
231 common exit_group sys_exit_group
|
||||
231 common exit_group sys_exit_group - noreturn
|
||||
232 common epoll_wait sys_epoll_wait
|
||||
233 common epoll_ctl sys_epoll_ctl
|
||||
234 common tgkill sys_tgkill
|
||||
|
|
|
@ -1625,6 +1625,7 @@ static bool __init spec_ctrl_bhi_dis(void)
|
|||
enum bhi_mitigations {
|
||||
BHI_MITIGATION_OFF,
|
||||
BHI_MITIGATION_ON,
|
||||
BHI_MITIGATION_VMEXIT_ONLY,
|
||||
};
|
||||
|
||||
static enum bhi_mitigations bhi_mitigation __ro_after_init =
|
||||
|
@ -1639,6 +1640,8 @@ static int __init spectre_bhi_parse_cmdline(char *str)
|
|||
bhi_mitigation = BHI_MITIGATION_OFF;
|
||||
else if (!strcmp(str, "on"))
|
||||
bhi_mitigation = BHI_MITIGATION_ON;
|
||||
else if (!strcmp(str, "vmexit"))
|
||||
bhi_mitigation = BHI_MITIGATION_VMEXIT_ONLY;
|
||||
else
|
||||
pr_err("Ignoring unknown spectre_bhi option (%s)", str);
|
||||
|
||||
|
@ -1659,19 +1662,22 @@ static void __init bhi_select_mitigation(void)
|
|||
return;
|
||||
}
|
||||
|
||||
/* Mitigate in hardware if supported */
|
||||
if (spec_ctrl_bhi_dis())
|
||||
return;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_X86_64))
|
||||
return;
|
||||
|
||||
/* Mitigate KVM by default */
|
||||
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
|
||||
pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
|
||||
if (bhi_mitigation == BHI_MITIGATION_VMEXIT_ONLY) {
|
||||
pr_info("Spectre BHI mitigation: SW BHB clearing on VM exit only\n");
|
||||
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Mitigate syscalls when the mitigation is forced =on */
|
||||
pr_info("Spectre BHI mitigation: SW BHB clearing on syscall and VM exit\n");
|
||||
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
|
||||
pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
|
||||
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
|
||||
}
|
||||
|
||||
static void __init spectre_v2_select_mitigation(void)
|
||||
|
|
|
@ -9,6 +9,10 @@
|
|||
#include <linux/cache.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long,
|
||||
unsigned long, unsigned long,
|
||||
unsigned long, unsigned long);
|
||||
|
||||
/*
|
||||
* Below you can see, in terms of #define's, the differences between the x86-64
|
||||
* and the UML syscall table.
|
||||
|
@ -22,15 +26,13 @@
|
|||
#define sys_vm86 sys_ni_syscall
|
||||
|
||||
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
|
||||
#define __SYSCALL_NORETURN __SYSCALL
|
||||
|
||||
#define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
#include <asm/syscalls_32.h>
|
||||
#undef __SYSCALL
|
||||
|
||||
#undef __SYSCALL
|
||||
#define __SYSCALL(nr, sym) sym,
|
||||
|
||||
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
|
||||
const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
|
||||
#include <asm/syscalls_32.h>
|
||||
};
|
||||
|
|
|
@ -9,6 +9,10 @@
|
|||
#include <linux/cache.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long,
|
||||
unsigned long, unsigned long,
|
||||
unsigned long, unsigned long);
|
||||
|
||||
/*
|
||||
* Below you can see, in terms of #define's, the differences between the x86-64
|
||||
* and the UML syscall table.
|
||||
|
@ -18,14 +22,13 @@
|
|||
#define sys_iopl sys_ni_syscall
|
||||
#define sys_ioperm sys_ni_syscall
|
||||
|
||||
#define __SYSCALL_NORETURN __SYSCALL
|
||||
|
||||
#define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
#include <asm/syscalls_64.h>
|
||||
#undef __SYSCALL
|
||||
|
||||
#undef __SYSCALL
|
||||
#define __SYSCALL(nr, sym) sym,
|
||||
|
||||
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
|
||||
const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
|
||||
#include <asm/syscalls_64.h>
|
||||
};
|
||||
|
|
|
@ -54,7 +54,7 @@ nxt=0
|
|||
|
||||
grep -E "^[0-9]+[[:space:]]+$abis" "$infile" | {
|
||||
|
||||
while read nr abi name native compat ; do
|
||||
while read nr abi name native compat noreturn; do
|
||||
|
||||
if [ $nxt -gt $nr ]; then
|
||||
echo "error: $infile: syscall table is not sorted or duplicates the same syscall number" >&2
|
||||
|
@ -66,7 +66,21 @@ grep -E "^[0-9]+[[:space:]]+$abis" "$infile" | {
|
|||
nxt=$((nxt + 1))
|
||||
done
|
||||
|
||||
if [ -n "$compat" ]; then
|
||||
if [ "$compat" = "-" ]; then
|
||||
unset compat
|
||||
fi
|
||||
|
||||
if [ -n "$noreturn" ]; then
|
||||
if [ "$noreturn" != "noreturn" ]; then
|
||||
echo "error: $infile: invalid string \"$noreturn\" in 'noreturn' column"
|
||||
exit 1
|
||||
fi
|
||||
if [ -n "$compat" ]; then
|
||||
echo "__SYSCALL_COMPAT_NORETURN($nr, $native, $compat)"
|
||||
else
|
||||
echo "__SYSCALL_NORETURN($nr, $native)"
|
||||
fi
|
||||
elif [ -n "$compat" ]; then
|
||||
echo "__SYSCALL_WITH_COMPAT($nr, $native, $compat)"
|
||||
elif [ -n "$native" ]; then
|
||||
echo "__SYSCALL($nr, $native)"
|
||||
|
|
|
@ -7,12 +7,16 @@
|
|||
* Yes, this is unfortunate. A better solution is in the works.
|
||||
*/
|
||||
NORETURN(__fortify_panic)
|
||||
NORETURN(__ia32_sys_exit)
|
||||
NORETURN(__ia32_sys_exit_group)
|
||||
NORETURN(__kunit_abort)
|
||||
NORETURN(__module_put_and_kthread_exit)
|
||||
NORETURN(__reiserfs_panic)
|
||||
NORETURN(__stack_chk_fail)
|
||||
NORETURN(__tdx_hypercall_failed)
|
||||
NORETURN(__ubsan_handle_builtin_unreachable)
|
||||
NORETURN(__x64_sys_exit)
|
||||
NORETURN(__x64_sys_exit_group)
|
||||
NORETURN(arch_cpu_idle_dead)
|
||||
NORETURN(bch2_trans_in_restart_error)
|
||||
NORETURN(bch2_trans_restart_error)
|
||||
|
|
Loading…
Add table
Reference in a new issue