| 1 | // SPDX-License-Identifier: GPL-2.0-or-later | 
|---|
| 2 | /* | 
|---|
| 3 | *  Kernel Probes Jump Optimization (Optprobes) | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright (C) IBM Corporation, 2002, 2004 | 
|---|
| 6 | * Copyright (C) Hitachi Ltd., 2012 | 
|---|
| 7 | */ | 
|---|
| 8 | #include <linux/kprobes.h> | 
|---|
| 9 | #include <linux/perf_event.h> | 
|---|
| 10 | #include <linux/ptrace.h> | 
|---|
| 11 | #include <linux/string.h> | 
|---|
| 12 | #include <linux/slab.h> | 
|---|
| 13 | #include <linux/hardirq.h> | 
|---|
| 14 | #include <linux/preempt.h> | 
|---|
| 15 | #include <linux/extable.h> | 
|---|
| 16 | #include <linux/kdebug.h> | 
|---|
| 17 | #include <linux/kallsyms.h> | 
|---|
| 18 | #include <linux/kgdb.h> | 
|---|
| 19 | #include <linux/ftrace.h> | 
|---|
| 20 | #include <linux/objtool.h> | 
|---|
| 21 | #include <linux/pgtable.h> | 
|---|
| 22 | #include <linux/static_call.h> | 
|---|
| 23 |  | 
|---|
| 24 | #include <asm/text-patching.h> | 
|---|
| 25 | #include <asm/cacheflush.h> | 
|---|
| 26 | #include <asm/desc.h> | 
|---|
| 27 | #include <linux/uaccess.h> | 
|---|
| 28 | #include <asm/alternative.h> | 
|---|
| 29 | #include <asm/insn.h> | 
|---|
| 30 | #include <asm/debugreg.h> | 
|---|
| 31 | #include <asm/set_memory.h> | 
|---|
| 32 | #include <asm/sections.h> | 
|---|
| 33 | #include <asm/nospec-branch.h> | 
|---|
| 34 |  | 
|---|
| 35 | #include "common.h" | 
|---|
| 36 |  | 
|---|
| 37 | unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) | 
|---|
| 38 | { | 
|---|
| 39 | struct optimized_kprobe *op; | 
|---|
| 40 | struct kprobe *kp; | 
|---|
| 41 | long offs; | 
|---|
| 42 | int i; | 
|---|
| 43 |  | 
|---|
| 44 | for (i = 0; i < JMP32_INSN_SIZE; i++) { | 
|---|
| 45 | kp = get_kprobe(addr: (void *)addr - i); | 
|---|
| 46 | /* This function only handles jump-optimized kprobe */ | 
|---|
| 47 | if (kp && kprobe_optimized(p: kp)) { | 
|---|
| 48 | op = container_of(kp, struct optimized_kprobe, kp); | 
|---|
| 49 | /* If op is optimized or under unoptimizing */ | 
|---|
| 50 | if (list_empty(head: &op->list) || optprobe_queued_unopt(op)) | 
|---|
| 51 | goto found; | 
|---|
| 52 | } | 
|---|
| 53 | } | 
|---|
| 54 |  | 
|---|
| 55 | return addr; | 
|---|
| 56 | found: | 
|---|
| 57 | /* | 
|---|
| 58 | * If the kprobe can be optimized, original bytes which can be | 
|---|
| 59 | * overwritten by jump destination address. In this case, original | 
|---|
| 60 | * bytes must be recovered from op->optinsn.copied_insn buffer. | 
|---|
| 61 | */ | 
|---|
| 62 | if (copy_from_kernel_nofault(dst: buf, src: (void *)addr, | 
|---|
| 63 | MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) | 
|---|
| 64 | return 0UL; | 
|---|
| 65 |  | 
|---|
| 66 | if (addr == (unsigned long)kp->addr) { | 
|---|
| 67 | buf[0] = kp->opcode; | 
|---|
| 68 | memcpy(to: buf + 1, from: op->optinsn.copied_insn, DISP32_SIZE); | 
|---|
| 69 | } else { | 
|---|
| 70 | offs = addr - (unsigned long)kp->addr - 1; | 
|---|
| 71 | memcpy(to: buf, from: op->optinsn.copied_insn + offs, DISP32_SIZE - offs); | 
|---|
| 72 | } | 
|---|
| 73 |  | 
|---|
| 74 | return (unsigned long)buf; | 
|---|
| 75 | } | 
|---|
| 76 |  | 
|---|
| 77 | static void synthesize_clac(kprobe_opcode_t *addr) | 
|---|
| 78 | { | 
|---|
| 79 | /* | 
|---|
| 80 | * Can't be static_cpu_has() due to how objtool treats this feature bit. | 
|---|
| 81 | * This isn't a fast path anyway. | 
|---|
| 82 | */ | 
|---|
| 83 | if (!boot_cpu_has(X86_FEATURE_SMAP)) | 
|---|
| 84 | return; | 
|---|
| 85 |  | 
|---|
| 86 | /* Replace the NOP3 with CLAC */ | 
|---|
| 87 | addr[0] = 0x0f; | 
|---|
| 88 | addr[1] = 0x01; | 
|---|
| 89 | addr[2] = 0xca; | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | 
|---|
| 93 | static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) | 
|---|
| 94 | { | 
|---|
| 95 | #ifdef CONFIG_X86_64 | 
|---|
| 96 | *addr++ = 0x48; | 
|---|
| 97 | *addr++ = 0xbf; | 
|---|
| 98 | #else | 
|---|
| 99 | *addr++ = 0xb8; | 
|---|
| 100 | #endif | 
|---|
| 101 | *(unsigned long *)addr = val; | 
|---|
| 102 | } | 
|---|
| 103 |  | 
|---|
| 104 | asm ( | 
|---|
| 105 | ".pushsection .rodata\n" | 
|---|
| 106 | "optprobe_template_func:\n" | 
|---|
| 107 | ".global optprobe_template_entry\n" | 
|---|
| 108 | "optprobe_template_entry:\n" | 
|---|
| 109 | #ifdef CONFIG_X86_64 | 
|---|
| 110 | "       pushq $"__stringify(__KERNEL_DS) "\n" | 
|---|
| 111 | /* Save the 'sp - 8', this will be fixed later. */ | 
|---|
| 112 | "	pushq %rsp\n" | 
|---|
| 113 | "	pushfq\n" | 
|---|
| 114 | ".global optprobe_template_clac\n" | 
|---|
| 115 | "optprobe_template_clac:\n" | 
|---|
| 116 | ASM_NOP3 | 
|---|
| 117 | SAVE_REGS_STRING | 
|---|
| 118 | "	movq %rsp, %rsi\n" | 
|---|
| 119 | ".global optprobe_template_val\n" | 
|---|
| 120 | "optprobe_template_val:\n" | 
|---|
| 121 | ASM_NOP5 | 
|---|
| 122 | ASM_NOP5 | 
|---|
| 123 | ".global optprobe_template_call\n" | 
|---|
| 124 | "optprobe_template_call:\n" | 
|---|
| 125 | ASM_NOP5 | 
|---|
| 126 | /* Copy 'regs->flags' into 'regs->ss'. */ | 
|---|
| 127 | "	movq 18*8(%rsp), %rdx\n" | 
|---|
| 128 | "	movq %rdx, 20*8(%rsp)\n" | 
|---|
| 129 | RESTORE_REGS_STRING | 
|---|
| 130 | /* Skip 'regs->flags' and 'regs->sp'. */ | 
|---|
| 131 | "	addq $16, %rsp\n" | 
|---|
| 132 | /* And pop flags register from 'regs->ss'. */ | 
|---|
| 133 | "	popfq\n" | 
|---|
| 134 | #else /* CONFIG_X86_32 */ | 
|---|
| 135 | "	pushl %ss\n" | 
|---|
| 136 | /* Save the 'sp - 4', this will be fixed later. */ | 
|---|
| 137 | "	pushl %esp\n" | 
|---|
| 138 | "	pushfl\n" | 
|---|
| 139 | ".global optprobe_template_clac\n" | 
|---|
| 140 | "optprobe_template_clac:\n" | 
|---|
| 141 | ASM_NOP3 | 
|---|
| 142 | SAVE_REGS_STRING | 
|---|
| 143 | "	movl %esp, %edx\n" | 
|---|
| 144 | ".global optprobe_template_val\n" | 
|---|
| 145 | "optprobe_template_val:\n" | 
|---|
| 146 | ASM_NOP5 | 
|---|
| 147 | ".global optprobe_template_call\n" | 
|---|
| 148 | "optprobe_template_call:\n" | 
|---|
| 149 | ASM_NOP5 | 
|---|
| 150 | /* Copy 'regs->flags' into 'regs->ss'. */ | 
|---|
| 151 | "	movl 14*4(%esp), %edx\n" | 
|---|
| 152 | "	movl %edx, 16*4(%esp)\n" | 
|---|
| 153 | RESTORE_REGS_STRING | 
|---|
| 154 | /* Skip 'regs->flags' and 'regs->sp'. */ | 
|---|
| 155 | "	addl $8, %esp\n" | 
|---|
| 156 | /* And pop flags register from 'regs->ss'. */ | 
|---|
| 157 | "	popfl\n" | 
|---|
| 158 | #endif | 
|---|
| 159 | ".global optprobe_template_end\n" | 
|---|
| 160 | "optprobe_template_end:\n" | 
|---|
| 161 | ".popsection\n"); | 
|---|
| 162 |  | 
|---|
| 163 | void optprobe_template_func(void); | 
|---|
| 164 | STACK_FRAME_NON_STANDARD(optprobe_template_func); | 
|---|
| 165 |  | 
|---|
| 166 | #define TMPL_CLAC_IDX \ | 
|---|
| 167 | ((long)optprobe_template_clac - (long)optprobe_template_entry) | 
|---|
| 168 | #define TMPL_MOVE_IDX \ | 
|---|
| 169 | ((long)optprobe_template_val - (long)optprobe_template_entry) | 
|---|
| 170 | #define TMPL_CALL_IDX \ | 
|---|
| 171 | ((long)optprobe_template_call - (long)optprobe_template_entry) | 
|---|
| 172 | #define TMPL_END_IDX \ | 
|---|
| 173 | ((long)optprobe_template_end - (long)optprobe_template_entry) | 
|---|
| 174 |  | 
|---|
| 175 | /* Optimized kprobe call back function: called from optinsn */ | 
|---|
| 176 | static void | 
|---|
| 177 | optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) | 
|---|
| 178 | { | 
|---|
| 179 | /* This is possible if op is under delayed unoptimizing */ | 
|---|
| 180 | if (kprobe_disabled(p: &op->kp)) | 
|---|
| 181 | return; | 
|---|
| 182 |  | 
|---|
| 183 | preempt_disable(); | 
|---|
| 184 | if (kprobe_running()) { | 
|---|
| 185 | kprobes_inc_nmissed_count(p: &op->kp); | 
|---|
| 186 | } else { | 
|---|
| 187 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | 
|---|
| 188 | /* Adjust stack pointer */ | 
|---|
| 189 | regs->sp += sizeof(long); | 
|---|
| 190 | /* Save skipped registers */ | 
|---|
| 191 | regs->cs = __KERNEL_CS; | 
|---|
| 192 | #ifdef CONFIG_X86_32 | 
|---|
| 193 | regs->gs = 0; | 
|---|
| 194 | #endif | 
|---|
| 195 | regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE; | 
|---|
| 196 | regs->orig_ax = ~0UL; | 
|---|
| 197 |  | 
|---|
| 198 | __this_cpu_write(current_kprobe, &op->kp); | 
|---|
| 199 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | 
|---|
| 200 | opt_pre_handler(p: &op->kp, regs); | 
|---|
| 201 | __this_cpu_write(current_kprobe, NULL); | 
|---|
| 202 | } | 
|---|
| 203 | preempt_enable(); | 
|---|
| 204 | } | 
|---|
| 205 | NOKPROBE_SYMBOL(optimized_callback); | 
|---|
| 206 |  | 
|---|
| 207 | static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) | 
|---|
| 208 | { | 
|---|
| 209 | struct insn insn; | 
|---|
| 210 | int len = 0, ret; | 
|---|
| 211 |  | 
|---|
| 212 | while (len < JMP32_INSN_SIZE) { | 
|---|
| 213 | ret = __copy_instruction(dest: dest + len, src: src + len, real: real + len, insn: &insn); | 
|---|
| 214 | if (!ret || !can_boost(insn: &insn, orig_addr: src + len)) | 
|---|
| 215 | return -EINVAL; | 
|---|
| 216 | len += ret; | 
|---|
| 217 | } | 
|---|
| 218 | /* Check whether the address range is reserved */ | 
|---|
| 219 | if (ftrace_text_reserved(start: src, end: src + len - 1) || | 
|---|
| 220 | alternatives_text_reserved(start: src, end: src + len - 1) || | 
|---|
| 221 | jump_label_text_reserved(start: src, end: src + len - 1) || | 
|---|
| 222 | static_call_text_reserved(start: src, end: src + len - 1)) | 
|---|
| 223 | return -EBUSY; | 
|---|
| 224 |  | 
|---|
| 225 | return len; | 
|---|
| 226 | } | 
|---|
| 227 |  | 
|---|
| 228 | /* Check whether insn is indirect jump */ | 
|---|
| 229 | static int insn_is_indirect_jump(struct insn *insn) | 
|---|
| 230 | { | 
|---|
| 231 | return ((insn->opcode.bytes[0] == 0xff && | 
|---|
| 232 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | 
|---|
| 233 | insn->opcode.bytes[0] == 0xea);	/* Segment based jump */ | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | /* Check whether insn jumps into specified address range */ | 
|---|
| 237 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | 
|---|
| 238 | { | 
|---|
| 239 | unsigned long target = 0; | 
|---|
| 240 |  | 
|---|
| 241 | switch (insn->opcode.bytes[0]) { | 
|---|
| 242 | case 0xe0:	/* loopne */ | 
|---|
| 243 | case 0xe1:	/* loope */ | 
|---|
| 244 | case 0xe2:	/* loop */ | 
|---|
| 245 | case 0xe3:	/* jcxz */ | 
|---|
| 246 | case 0xe9:	/* near relative jump */ | 
|---|
| 247 | case 0xeb:	/* short relative jump */ | 
|---|
| 248 | break; | 
|---|
| 249 | case 0x0f: | 
|---|
| 250 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | 
|---|
| 251 | break; | 
|---|
| 252 | return 0; | 
|---|
| 253 | default: | 
|---|
| 254 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | 
|---|
| 255 | break; | 
|---|
| 256 | return 0; | 
|---|
| 257 | } | 
|---|
| 258 | target = (unsigned long)insn->next_byte + insn->immediate.value; | 
|---|
| 259 |  | 
|---|
| 260 | return (start <= target && target <= start + len); | 
|---|
| 261 | } | 
|---|
| 262 |  | 
|---|
| 263 | /* Decode whole function to ensure any instructions don't jump into target */ | 
|---|
| 264 | static int can_optimize(unsigned long paddr) | 
|---|
| 265 | { | 
|---|
| 266 | unsigned long addr, size = 0, offset = 0; | 
|---|
| 267 | struct insn insn; | 
|---|
| 268 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 
|---|
| 269 |  | 
|---|
| 270 | /* Lookup symbol including addr */ | 
|---|
| 271 | if (!kallsyms_lookup_size_offset(addr: paddr, symbolsize: &size, offset: &offset)) | 
|---|
| 272 | return 0; | 
|---|
| 273 |  | 
|---|
| 274 | /* | 
|---|
| 275 | * Do not optimize in the entry code due to the unstable | 
|---|
| 276 | * stack handling and registers setup. | 
|---|
| 277 | */ | 
|---|
| 278 | if (((paddr >= (unsigned long)__entry_text_start) && | 
|---|
| 279 | (paddr <  (unsigned long)__entry_text_end))) | 
|---|
| 280 | return 0; | 
|---|
| 281 |  | 
|---|
| 282 | /* Check there is enough space for a relative jump. */ | 
|---|
| 283 | if (size - offset < JMP32_INSN_SIZE) | 
|---|
| 284 | return 0; | 
|---|
| 285 |  | 
|---|
| 286 | /* Decode instructions */ | 
|---|
| 287 | addr = paddr - offset; | 
|---|
| 288 | while (addr < paddr - offset + size) { /* Decode until function end */ | 
|---|
| 289 | unsigned long recovered_insn; | 
|---|
| 290 | int ret; | 
|---|
| 291 |  | 
|---|
| 292 | if (search_exception_tables(add: addr)) | 
|---|
| 293 | /* | 
|---|
| 294 | * Since some fixup code will jumps into this function, | 
|---|
| 295 | * we can't optimize kprobe in this function. | 
|---|
| 296 | */ | 
|---|
| 297 | return 0; | 
|---|
| 298 | recovered_insn = recover_probed_instruction(buf, addr); | 
|---|
| 299 | if (!recovered_insn) | 
|---|
| 300 | return 0; | 
|---|
| 301 |  | 
|---|
| 302 | ret = insn_decode_kernel(&insn, (void *)recovered_insn); | 
|---|
| 303 | if (ret < 0) | 
|---|
| 304 | return 0; | 
|---|
| 305 | #ifdef CONFIG_KGDB | 
|---|
| 306 | /* | 
|---|
| 307 | * If there is a dynamically installed kgdb sw breakpoint, | 
|---|
| 308 | * this function should not be probed. | 
|---|
| 309 | */ | 
|---|
| 310 | if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && | 
|---|
| 311 | kgdb_has_hit_break(addr)) | 
|---|
| 312 | return 0; | 
|---|
| 313 | #endif | 
|---|
| 314 | /* Recover address */ | 
|---|
| 315 | insn.kaddr = (void *)addr; | 
|---|
| 316 | insn.next_byte = (void *)(addr + insn.length); | 
|---|
| 317 | /* | 
|---|
| 318 | * Check any instructions don't jump into target, indirectly or | 
|---|
| 319 | * directly. | 
|---|
| 320 | * | 
|---|
| 321 | * The indirect case is present to handle a code with jump | 
|---|
| 322 | * tables. When the kernel uses retpolines, the check should in | 
|---|
| 323 | * theory additionally look for jumps to indirect thunks. | 
|---|
| 324 | * However, the kernel built with retpolines or IBT has jump | 
|---|
| 325 | * tables disabled so the check can be skipped altogether. | 
|---|
| 326 | */ | 
|---|
| 327 | if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && | 
|---|
| 328 | !IS_ENABLED(CONFIG_X86_KERNEL_IBT) && | 
|---|
| 329 | insn_is_indirect_jump(insn: &insn)) | 
|---|
| 330 | return 0; | 
|---|
| 331 | if (insn_jump_into_range(insn: &insn, start: paddr + INT3_INSN_SIZE, | 
|---|
| 332 | DISP32_SIZE)) | 
|---|
| 333 | return 0; | 
|---|
| 334 | addr += insn.length; | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|
| 337 | return 1; | 
|---|
| 338 | } | 
|---|
| 339 |  | 
|---|
| 340 | /* Check optimized_kprobe can actually be optimized. */ | 
|---|
| 341 | int arch_check_optimized_kprobe(struct optimized_kprobe *op) | 
|---|
| 342 | { | 
|---|
| 343 | int i; | 
|---|
| 344 | struct kprobe *p; | 
|---|
| 345 |  | 
|---|
| 346 | for (i = 1; i < op->optinsn.size; i++) { | 
|---|
| 347 | p = get_kprobe(addr: op->kp.addr + i); | 
|---|
| 348 | if (p && !kprobe_disarmed(p)) | 
|---|
| 349 | return -EEXIST; | 
|---|
| 350 | } | 
|---|
| 351 |  | 
|---|
| 352 | return 0; | 
|---|
| 353 | } | 
|---|
| 354 |  | 
|---|
| 355 | /* Check the addr is within the optimized instructions. */ | 
|---|
| 356 | int arch_within_optimized_kprobe(struct optimized_kprobe *op, | 
|---|
| 357 | kprobe_opcode_t *addr) | 
|---|
| 358 | { | 
|---|
| 359 | return (op->kp.addr <= addr && | 
|---|
| 360 | op->kp.addr + op->optinsn.size > addr); | 
|---|
| 361 | } | 
|---|
| 362 |  | 
|---|
| 363 | /* Free optimized instruction slot */ | 
|---|
| 364 | static | 
|---|
| 365 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | 
|---|
| 366 | { | 
|---|
| 367 | u8 *slot = op->optinsn.insn; | 
|---|
| 368 | if (slot) { | 
|---|
| 369 | int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE; | 
|---|
| 370 |  | 
|---|
| 371 | /* Record the perf event before freeing the slot */ | 
|---|
| 372 | if (dirty) | 
|---|
| 373 | perf_event_text_poke(addr: slot, old_bytes: slot, old_len: len, NULL, new_len: 0); | 
|---|
| 374 |  | 
|---|
| 375 | free_optinsn_slot(slot, dirty); | 
|---|
| 376 | op->optinsn.insn = NULL; | 
|---|
| 377 | op->optinsn.size = 0; | 
|---|
| 378 | } | 
|---|
| 379 | } | 
|---|
| 380 |  | 
|---|
| 381 | void arch_remove_optimized_kprobe(struct optimized_kprobe *op) | 
|---|
| 382 | { | 
|---|
| 383 | __arch_remove_optimized_kprobe(op, dirty: 1); | 
|---|
| 384 | } | 
|---|
| 385 |  | 
|---|
| 386 | /* | 
|---|
| 387 | * Copy replacing target instructions | 
|---|
| 388 | * Target instructions MUST be relocatable (checked inside) | 
|---|
| 389 | * This is called when new aggr(opt)probe is allocated or reused. | 
|---|
| 390 | */ | 
|---|
| 391 | int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, | 
|---|
| 392 | struct kprobe *__unused) | 
|---|
| 393 | { | 
|---|
| 394 | u8 *buf = NULL, *slot; | 
|---|
| 395 | int ret, len; | 
|---|
| 396 | long rel; | 
|---|
| 397 |  | 
|---|
| 398 | if (!can_optimize(paddr: (unsigned long)op->kp.addr)) | 
|---|
| 399 | return -EILSEQ; | 
|---|
| 400 |  | 
|---|
| 401 | buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL); | 
|---|
| 402 | if (!buf) | 
|---|
| 403 | return -ENOMEM; | 
|---|
| 404 |  | 
|---|
| 405 | op->optinsn.insn = slot = get_optinsn_slot(); | 
|---|
| 406 | if (!slot) { | 
|---|
| 407 | ret = -ENOMEM; | 
|---|
| 408 | goto out; | 
|---|
| 409 | } | 
|---|
| 410 |  | 
|---|
| 411 | /* | 
|---|
| 412 | * Verify if the address gap is in 2GB range, because this uses | 
|---|
| 413 | * a relative jump. | 
|---|
| 414 | */ | 
|---|
| 415 | rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE; | 
|---|
| 416 | if (abs(rel) > 0x7fffffff) { | 
|---|
| 417 | ret = -ERANGE; | 
|---|
| 418 | goto err; | 
|---|
| 419 | } | 
|---|
| 420 |  | 
|---|
| 421 | /* Copy arch-dep-instance from template */ | 
|---|
| 422 | memcpy(to: buf, from: optprobe_template_entry, TMPL_END_IDX); | 
|---|
| 423 |  | 
|---|
| 424 | /* Copy instructions into the out-of-line buffer */ | 
|---|
| 425 | ret = copy_optimized_instructions(dest: buf + TMPL_END_IDX, src: op->kp.addr, | 
|---|
| 426 | real: slot + TMPL_END_IDX); | 
|---|
| 427 | if (ret < 0) | 
|---|
| 428 | goto err; | 
|---|
| 429 | op->optinsn.size = ret; | 
|---|
| 430 | len = TMPL_END_IDX + op->optinsn.size; | 
|---|
| 431 |  | 
|---|
| 432 | synthesize_clac(addr: buf + TMPL_CLAC_IDX); | 
|---|
| 433 |  | 
|---|
| 434 | /* Set probe information */ | 
|---|
| 435 | synthesize_set_arg1(addr: buf + TMPL_MOVE_IDX, val: (unsigned long)op); | 
|---|
| 436 |  | 
|---|
| 437 | /* Set probe function call */ | 
|---|
| 438 | synthesize_relcall(dest: buf + TMPL_CALL_IDX, | 
|---|
| 439 | from: slot + TMPL_CALL_IDX, to: optimized_callback); | 
|---|
| 440 |  | 
|---|
| 441 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | 
|---|
| 442 | synthesize_reljump(dest: buf + len, from: slot + len, | 
|---|
| 443 | to: (u8 *)op->kp.addr + op->optinsn.size); | 
|---|
| 444 | len += JMP32_INSN_SIZE; | 
|---|
| 445 |  | 
|---|
| 446 | /* | 
|---|
| 447 | * Note	len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also | 
|---|
| 448 | * used in __arch_remove_optimized_kprobe(). | 
|---|
| 449 | */ | 
|---|
| 450 |  | 
|---|
| 451 | /* We have to use text_poke() for instruction buffer because it is RO */ | 
|---|
| 452 | perf_event_text_poke(addr: slot, NULL, old_len: 0, new_bytes: buf, new_len: len); | 
|---|
| 453 | text_poke(addr: slot, opcode: buf, len); | 
|---|
| 454 |  | 
|---|
| 455 | ret = 0; | 
|---|
| 456 | out: | 
|---|
| 457 | kfree(objp: buf); | 
|---|
| 458 | return ret; | 
|---|
| 459 |  | 
|---|
| 460 | err: | 
|---|
| 461 | __arch_remove_optimized_kprobe(op, dirty: 0); | 
|---|
| 462 | goto out; | 
|---|
| 463 | } | 
|---|
| 464 |  | 
|---|
| 465 | /* | 
|---|
| 466 | * Replace breakpoints (INT3) with relative jumps (JMP.d32). | 
|---|
| 467 | * Caller must call with locking kprobe_mutex and text_mutex. | 
|---|
| 468 | * | 
|---|
| 469 | * The caller will have installed a regular kprobe and after that issued | 
|---|
| 470 | * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in | 
|---|
| 471 | * the 4 bytes after the INT3 are unused and can now be overwritten. | 
|---|
| 472 | */ | 
|---|
| 473 | void arch_optimize_kprobes(struct list_head *oplist) | 
|---|
| 474 | { | 
|---|
| 475 | struct optimized_kprobe *op, *tmp; | 
|---|
| 476 | u8 insn_buff[JMP32_INSN_SIZE]; | 
|---|
| 477 |  | 
|---|
| 478 | list_for_each_entry_safe(op, tmp, oplist, list) { | 
|---|
| 479 | s32 rel = (s32)((long)op->optinsn.insn - | 
|---|
| 480 | ((long)op->kp.addr + JMP32_INSN_SIZE)); | 
|---|
| 481 |  | 
|---|
| 482 | WARN_ON(kprobe_disabled(&op->kp)); | 
|---|
| 483 |  | 
|---|
| 484 | /* Backup instructions which will be replaced by jump address */ | 
|---|
| 485 | memcpy(to: op->optinsn.copied_insn, from: op->kp.addr + INT3_INSN_SIZE, | 
|---|
| 486 | DISP32_SIZE); | 
|---|
| 487 |  | 
|---|
| 488 | insn_buff[0] = JMP32_INSN_OPCODE; | 
|---|
| 489 | *(s32 *)(&insn_buff[1]) = rel; | 
|---|
| 490 |  | 
|---|
| 491 | smp_text_poke_single(addr: op->kp.addr, opcode: insn_buff, JMP32_INSN_SIZE, NULL); | 
|---|
| 492 |  | 
|---|
| 493 | list_del_init(entry: &op->list); | 
|---|
| 494 | } | 
|---|
| 495 | } | 
|---|
| 496 |  | 
|---|
| 497 | /* | 
|---|
| 498 | * Replace a relative jump (JMP.d32) with a breakpoint (INT3). | 
|---|
| 499 | * | 
|---|
| 500 | * After that, we can restore the 4 bytes after the INT3 to undo what | 
|---|
| 501 | * arch_optimize_kprobes() scribbled. This is safe since those bytes will be | 
|---|
| 502 | * unused once the INT3 lands. | 
|---|
| 503 | */ | 
|---|
| 504 | void arch_unoptimize_kprobe(struct optimized_kprobe *op) | 
|---|
| 505 | { | 
|---|
| 506 | u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, }; | 
|---|
| 507 | u8 old[JMP32_INSN_SIZE]; | 
|---|
| 508 | u8 *addr = op->kp.addr; | 
|---|
| 509 |  | 
|---|
| 510 | memcpy(to: old, from: op->kp.addr, JMP32_INSN_SIZE); | 
|---|
| 511 | memcpy(to: new + INT3_INSN_SIZE, | 
|---|
| 512 | from: op->optinsn.copied_insn, | 
|---|
| 513 | JMP32_INSN_SIZE - INT3_INSN_SIZE); | 
|---|
| 514 |  | 
|---|
| 515 | text_poke(addr, opcode: new, INT3_INSN_SIZE); | 
|---|
| 516 | smp_text_poke_sync_each_cpu(); | 
|---|
| 517 | text_poke(addr: addr + INT3_INSN_SIZE, | 
|---|
| 518 | opcode: new + INT3_INSN_SIZE, | 
|---|
| 519 | JMP32_INSN_SIZE - INT3_INSN_SIZE); | 
|---|
| 520 | smp_text_poke_sync_each_cpu(); | 
|---|
| 521 |  | 
|---|
| 522 | perf_event_text_poke(addr: op->kp.addr, old_bytes: old, JMP32_INSN_SIZE, new_bytes: new, JMP32_INSN_SIZE); | 
|---|
| 523 | } | 
|---|
| 524 |  | 
|---|
| 525 | /* | 
|---|
| 526 | * Recover original instructions and breakpoints from relative jumps. | 
|---|
| 527 | * Caller must call with locking kprobe_mutex. | 
|---|
| 528 | */ | 
|---|
| 529 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | 
|---|
| 530 | struct list_head *done_list) | 
|---|
| 531 | { | 
|---|
| 532 | struct optimized_kprobe *op, *tmp; | 
|---|
| 533 |  | 
|---|
| 534 | list_for_each_entry_safe(op, tmp, oplist, list) { | 
|---|
| 535 | arch_unoptimize_kprobe(op); | 
|---|
| 536 | list_move(list: &op->list, head: done_list); | 
|---|
| 537 | } | 
|---|
| 538 | } | 
|---|
| 539 |  | 
|---|
| 540 | int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) | 
|---|
| 541 | { | 
|---|
| 542 | struct optimized_kprobe *op; | 
|---|
| 543 |  | 
|---|
| 544 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | 
|---|
| 545 | /* This kprobe is really able to run optimized path. */ | 
|---|
| 546 | op = container_of(p, struct optimized_kprobe, kp); | 
|---|
| 547 | /* Detour through copied instructions */ | 
|---|
| 548 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | 
|---|
| 549 | if (!reenter) | 
|---|
| 550 | reset_current_kprobe(); | 
|---|
| 551 | return 1; | 
|---|
| 552 | } | 
|---|
| 553 | return 0; | 
|---|
| 554 | } | 
|---|
| 555 | NOKPROBE_SYMBOL(setup_detour_execution); | 
|---|
| 556 |  | 
|---|