mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-23 08:35:19 -05:00
CSD lock commits for v6.7
This series adds a kernel boot parameter that causes the kernel to panic if one of the call_smp_function() APIs is stalled for more than the specified duration. This is useful in deployments in which a clean panic is preferable to an indefinite stall. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEbK7UrM+RBIrCoViJnr8S83LZ+4wFAmU9plITHHBhdWxtY2tA a2VybmVsLm9yZwAKCRCevxLzctn7jB91D/9OiOMtV03TXN2K+zGmJMjTFgLuVnug OqG4mrCv7jTJ3k6fTpu7hih/BCmG1Mu7byyPV6BUSfKsYony7L4yTPFJsjK8lNmq MHh847DErGieuURCDnsvqBVpYIRfXnvW9ptlf+BMCjbzz4FuUu1XhJTm+U2nab3i BEIEMORxDCyghh7yluAVG6sULRXOqjv5VcypwOXUbavDf0JyJTns4QlXFD85yiBr nvfHvLUrzu5EblA3m09lTnCaKrAlz5pwD7fWQq7bS3rz2gndR/DcVcZknQ68FMsj Mcf0Zf45TzyWWfMcE8LCulhMlZ2GYsIm2YkIbgwlsOAndjBrV55rsgaVbSZDke37 QMHPGUZ7m7AjuDqpWZITrJjQHkWCtn/5tFSazHSlMwWg44pgOqvc3OgZn04tzn01 L/guq3yBIKBJiAjdgzdx8/H9S7cSH8TLEWFY2utEAMIjef9Xzi6qwQ4X5p4K9QYJ Sm1hTTCbF9NeyMk0o2rokR58f13+9ewxE4RIYcwP9loo6nbBcTVN/fvdHN6jQywa 7UIui508AUf55zQgO+5z8LsmjyNiDmDIeE8CLeVDicllWN+ne7F//AXAuA5V8m+y G0Mphn/lZpnddDSFlR/QFuMvGqtpIu2y8w93awq8g/VzirbkiiCia3iFcFtss4ip n0Y50CCu/W70fw== =ZD0S -----END PGP SIGNATURE----- Merge tag 'csd-lock.2023.10.23a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu Pull CSD lock update from Paul McKenney: "This adds a kernel boot parameter that causes the kernel to panic if one of the call_smp_function() APIs is stalled for more than the specified duration. This is useful in deployments in which a clean panic is preferable to an indefinite stall" * tag 'csd-lock.2023.10.23a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu: smp,csd: Throw an error if a CSD lock is stuck for too long
This commit is contained in:
commit
9a0f53e0cf
2 changed files with 19 additions and 1 deletions
|
@ -5864,6 +5864,13 @@
|
|||
This feature may be more efficiently disabled
|
||||
using the csdlock_debug- kernel parameter.
|
||||
|
||||
smp.panic_on_ipistall= [KNL]
|
||||
If a csd_lock_timeout extends for more than
|
||||
the specified number of milliseconds, panic the
|
||||
system. By default, let CSD-lock acquisition
|
||||
take as long as they take. Specifying 300,000
|
||||
for this value provides a 5-minute timeout.
|
||||
|
||||
smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices
|
||||
smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port
|
||||
smsc-ircc2.ircc_sir= [HW] SIR base I/O port
|
||||
|
|
13
kernel/smp.c
13
kernel/smp.c
|
@ -170,6 +170,8 @@ static DEFINE_PER_CPU(void *, cur_csd_info);
|
|||
|
||||
static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */
|
||||
module_param(csd_lock_timeout, ulong, 0444);
|
||||
static int panic_on_ipistall; /* CSD panic timeout in milliseconds, 300000 for five minutes. */
|
||||
module_param(panic_on_ipistall, int, 0444);
|
||||
|
||||
static atomic_t csd_bug_count = ATOMIC_INIT(0);
|
||||
|
||||
|
@ -230,6 +232,7 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
|
|||
}
|
||||
|
||||
ts2 = sched_clock();
|
||||
/* How long since we last checked for a stuck CSD lock.*/
|
||||
ts_delta = ts2 - *ts1;
|
||||
if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
|
||||
return false;
|
||||
|
@ -243,9 +246,17 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
|
|||
else
|
||||
cpux = cpu;
|
||||
cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
|
||||
/* How long since this CSD lock was stuck. */
|
||||
ts_delta = ts2 - ts0;
|
||||
pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
|
||||
firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0,
|
||||
firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts_delta,
|
||||
cpu, csd->func, csd->info);
|
||||
/*
|
||||
* If the CSD lock is still stuck after 5 minutes, it is unlikely
|
||||
* to become unstuck. Use a signed comparison to avoid triggering
|
||||
* on underflows when the TSC is out of sync between sockets.
|
||||
*/
|
||||
BUG_ON(panic_on_ipistall > 0 && (s64)ts_delta > ((s64)panic_on_ipistall * NSEC_PER_MSEC));
|
||||
if (cpu_cur_csd && csd != cpu_cur_csd) {
|
||||
pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
|
||||
*bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
|
||||
|
|
Loading…
Reference in a new issue