1// SPDX-License-Identifier: GPL-2.0
2
3#define pr_fmt(fmt) "x86/split lock detection: " fmt
4
5#include <linux/semaphore.h>
6#include <linux/workqueue.h>
7#include <linux/delay.h>
8#include <linux/cpuhotplug.h>
9#include <asm/cpu_device_id.h>
10#include <asm/cmdline.h>
11#include <asm/traps.h>
12#include <asm/cpu.h>
13#include <asm/msr.h>
14
15enum split_lock_detect_state {
16 sld_off = 0,
17 sld_warn,
18 sld_fatal,
19 sld_ratelimit,
20};
21
22/*
23 * Default to sld_off because most systems do not support split lock detection.
24 * sld_state_setup() will switch this to sld_warn on systems that support
25 * split lock/bus lock detect, unless there is a command line override.
26 */
27static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
28static u64 msr_test_ctrl_cache __ro_after_init;
29
30/*
31 * With a name like MSR_TEST_CTL it should go without saying, but don't touch
32 * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
33 * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
34 */
35static bool cpu_model_supports_sld __ro_after_init;
36
37static const struct {
38 const char *option;
39 enum split_lock_detect_state state;
40} sld_options[] __initconst = {
41 { "off", sld_off },
42 { "warn", sld_warn },
43 { "fatal", sld_fatal },
44 { "ratelimit:", sld_ratelimit },
45};
46
47static struct ratelimit_state bld_ratelimit;
48
49static unsigned int sysctl_sld_mitigate = 1;
50static DEFINE_SEMAPHORE(buslock_sem, 1);
51
52#ifdef CONFIG_PROC_SYSCTL
53static const struct ctl_table sld_sysctls[] = {
54 {
55 .procname = "split_lock_mitigate",
56 .data = &sysctl_sld_mitigate,
57 .maxlen = sizeof(unsigned int),
58 .mode = 0644,
59 .proc_handler = proc_douintvec_minmax,
60 .extra1 = SYSCTL_ZERO,
61 .extra2 = SYSCTL_ONE,
62 },
63};
64
65static int __init sld_mitigate_sysctl_init(void)
66{
67 register_sysctl_init("kernel", sld_sysctls);
68 return 0;
69}
70
71late_initcall(sld_mitigate_sysctl_init);
72#endif
73
74static inline bool match_option(const char *arg, int arglen, const char *opt)
75{
76 int len = strlen(opt), ratelimit;
77
78 if (strncmp(arg, opt, len))
79 return false;
80
81 /*
82 * Min ratelimit is 1 bus lock/sec.
83 * Max ratelimit is 1000 bus locks/sec.
84 */
85 if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
86 ratelimit > 0 && ratelimit <= 1000) {
87 ratelimit_state_init(rs: &bld_ratelimit, HZ, burst: ratelimit);
88 ratelimit_set_flags(rs: &bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
89 return true;
90 }
91
92 return len == arglen;
93}
94
95static bool split_lock_verify_msr(bool on)
96{
97 u64 ctrl, tmp;
98
99 if (rdmsrq_safe(MSR_TEST_CTRL, p: &ctrl))
100 return false;
101 if (on)
102 ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
103 else
104 ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
105 if (wrmsrq_safe(MSR_TEST_CTRL, val: ctrl))
106 return false;
107 rdmsrq(MSR_TEST_CTRL, tmp);
108 return ctrl == tmp;
109}
110
111static void __init sld_state_setup(void)
112{
113 enum split_lock_detect_state state = sld_warn;
114 char arg[20];
115 int i, ret;
116
117 if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
118 !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
119 return;
120
121 ret = cmdline_find_option(cmdline_ptr: boot_command_line, option: "split_lock_detect",
122 buffer: arg, bufsize: sizeof(arg));
123 if (ret >= 0) {
124 for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
125 if (match_option(arg, arglen: ret, opt: sld_options[i].option)) {
126 state = sld_options[i].state;
127 break;
128 }
129 }
130 }
131 sld_state = state;
132}
133
134static void __init __split_lock_setup(void)
135{
136 if (!split_lock_verify_msr(on: false)) {
137 pr_info("MSR access failed: Disabled\n");
138 return;
139 }
140
141 rdmsrq(MSR_TEST_CTRL, msr_test_ctrl_cache);
142
143 if (!split_lock_verify_msr(on: true)) {
144 pr_info("MSR access failed: Disabled\n");
145 return;
146 }
147
148 /* Restore the MSR to its cached value. */
149 wrmsrq(MSR_TEST_CTRL, val: msr_test_ctrl_cache);
150
151 setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
152}
153
154/*
155 * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
156 * is not implemented as one thread could undo the setting of the other
157 * thread immediately after dropping the lock anyway.
158 */
159static void sld_update_msr(bool on)
160{
161 u64 test_ctrl_val = msr_test_ctrl_cache;
162
163 if (on)
164 test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
165
166 wrmsrq(MSR_TEST_CTRL, val: test_ctrl_val);
167}
168
169void split_lock_init(void)
170{
171 /*
172 * #DB for bus lock handles ratelimit and #AC for split lock is
173 * disabled.
174 */
175 if (sld_state == sld_ratelimit) {
176 split_lock_verify_msr(on: false);
177 return;
178 }
179
180 if (cpu_model_supports_sld)
181 split_lock_verify_msr(on: sld_state != sld_off);
182}
183
184static void __split_lock_reenable_unlock(struct work_struct *work)
185{
186 sld_update_msr(on: true);
187 up(sem: &buslock_sem);
188}
189
190static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
191
192static void __split_lock_reenable(struct work_struct *work)
193{
194 sld_update_msr(on: true);
195}
196/*
197 * In order for each CPU to schedule its delayed work independently of the
198 * others, delayed work struct must be per-CPU. This is not required when
199 * sysctl_sld_mitigate is enabled because of the semaphore that limits
200 * the number of simultaneously scheduled delayed works to 1.
201 */
202static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
203
204/*
205 * Per-CPU delayed_work can't be statically initialized properly because
206 * the struct address is unknown. Thus per-CPU delayed_work structures
207 * have to be initialized during kernel initialization and after calling
208 * setup_per_cpu_areas().
209 */
210static int __init setup_split_lock_delayed_work(void)
211{
212 unsigned int cpu;
213
214 for_each_possible_cpu(cpu) {
215 struct delayed_work *work = per_cpu_ptr(&sl_reenable, cpu);
216
217 INIT_DELAYED_WORK(work, __split_lock_reenable);
218 }
219
220 return 0;
221}
222pure_initcall(setup_split_lock_delayed_work);
223
224/*
225 * If a CPU goes offline with pending delayed work to re-enable split lock
226 * detection then the delayed work will be executed on some other CPU. That
227 * handles releasing the buslock_sem, but because it executes on a
228 * different CPU probably won't re-enable split lock detection. This is a
229 * problem on HT systems since the sibling CPU on the same core may then be
230 * left running with split lock detection disabled.
231 *
232 * Unconditionally re-enable detection here.
233 */
234static int splitlock_cpu_offline(unsigned int cpu)
235{
236 sld_update_msr(on: true);
237
238 return 0;
239}
240
241static void split_lock_warn(unsigned long ip)
242{
243 struct delayed_work *work;
244 int cpu;
245 unsigned int saved_sld_mitigate = READ_ONCE(sysctl_sld_mitigate);
246
247 if (!current->reported_split_lock)
248 pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
249 current->comm, current->pid, ip);
250 current->reported_split_lock = 1;
251
252 if (saved_sld_mitigate) {
253 /*
254 * misery factor #1:
255 * sleep 10ms before trying to execute split lock.
256 */
257 if (msleep_interruptible(msecs: 10) > 0)
258 return;
259 /*
260 * Misery factor #2:
261 * only allow one buslocked disabled core at a time.
262 */
263 if (down_interruptible(sem: &buslock_sem) == -EINTR)
264 return;
265 }
266
267 cpu = get_cpu();
268 work = saved_sld_mitigate ? &sl_reenable_unlock : per_cpu_ptr(&sl_reenable, cpu);
269 schedule_delayed_work_on(cpu, dwork: work, delay: 2);
270
271 /* Disable split lock detection on this CPU to make progress */
272 sld_update_msr(on: false);
273 put_cpu();
274}
275
276bool handle_guest_split_lock(unsigned long ip)
277{
278 if (sld_state == sld_warn) {
279 split_lock_warn(ip);
280 return true;
281 }
282
283 pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
284 current->comm, current->pid,
285 sld_state == sld_fatal ? "fatal" : "bogus", ip);
286
287 current->thread.error_code = 0;
288 current->thread.trap_nr = X86_TRAP_AC;
289 force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
290 return false;
291}
292EXPORT_SYMBOL_GPL(handle_guest_split_lock);
293
294void bus_lock_init(void)
295{
296 u64 val;
297
298 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
299 return;
300
301 rdmsrq(MSR_IA32_DEBUGCTLMSR, val);
302
303 if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
304 (sld_state == sld_warn || sld_state == sld_fatal)) ||
305 sld_state == sld_off) {
306 /*
307 * Warn and fatal are handled by #AC for split lock if #AC for
308 * split lock is supported.
309 */
310 val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
311 } else {
312 val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
313 }
314
315 wrmsrq(MSR_IA32_DEBUGCTLMSR, val);
316}
317
318bool handle_user_split_lock(struct pt_regs *regs, long error_code)
319{
320 if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
321 return false;
322 split_lock_warn(ip: regs->ip);
323 return true;
324}
325
326void handle_bus_lock(struct pt_regs *regs)
327{
328 switch (sld_state) {
329 case sld_off:
330 break;
331 case sld_ratelimit:
332 /* Enforce no more than bld_ratelimit bus locks/sec. */
333 while (!__ratelimit(&bld_ratelimit))
334 msleep(msecs: 20);
335 /* Warn on the bus lock. */
336 fallthrough;
337 case sld_warn:
338 pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
339 current->comm, current->pid, regs->ip);
340 break;
341 case sld_fatal:
342 force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
343 break;
344 }
345}
346
347/*
348 * CPU models that are known to have the per-core split-lock detection
349 * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
350 */
351static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
352 X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
353 X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
354 X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
355 {}
356};
357
358static void __init split_lock_setup(struct cpuinfo_x86 *c)
359{
360 const struct x86_cpu_id *m;
361 u64 ia32_core_caps;
362
363 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
364 return;
365
366 /* Check for CPUs that have support but do not enumerate it: */
367 m = x86_match_cpu(match: split_lock_cpu_ids);
368 if (m)
369 goto supported;
370
371 if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
372 return;
373
374 /*
375 * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
376 * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set
377 * it have split lock detection.
378 */
379 rdmsrq(MSR_IA32_CORE_CAPS, ia32_core_caps);
380 if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
381 goto supported;
382
383 /* CPU is not in the model list and does not have the MSR bit: */
384 return;
385
386supported:
387 cpu_model_supports_sld = true;
388 __split_lock_setup();
389}
390
391static void sld_state_show(void)
392{
393 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
394 !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
395 return;
396
397 switch (sld_state) {
398 case sld_off:
399 pr_info("disabled\n");
400 break;
401 case sld_warn:
402 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
403 pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
404 if (cpuhp_setup_state(state: CPUHP_AP_ONLINE_DYN,
405 name: "x86/splitlock", NULL, teardown: splitlock_cpu_offline) < 0)
406 pr_warn("No splitlock CPU offline handler\n");
407 } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
408 pr_info("#DB: warning on user-space bus_locks\n");
409 }
410 break;
411 case sld_fatal:
412 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
413 pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
414 } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
415 pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
416 boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
417 " from non-WB" : "");
418 }
419 break;
420 case sld_ratelimit:
421 if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
422 pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
423 break;
424 }
425}
426
427void __init sld_setup(struct cpuinfo_x86 *c)
428{
429 split_lock_setup(c);
430 sld_state_setup();
431 sld_state_show();
432}
433