| 1 | // SPDX-License-Identifier: GPL-2.0-or-later | 
|---|
| 2 | /* | 
|---|
| 3 | *  Kernel Probes (KProbes) | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright (C) IBM Corporation, 2002, 2004 | 
|---|
| 6 | * | 
|---|
| 7 | * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel | 
|---|
| 8 | *		Probes initial implementation ( includes contributions from | 
|---|
| 9 | *		Rusty Russell). | 
|---|
| 10 | * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes | 
|---|
| 11 | *		interface to access function arguments. | 
|---|
| 12 | * 2004-Oct	Jim Keniston <jkenisto@us.ibm.com> and Prasanna S Panchamukhi | 
|---|
| 13 | *		<prasanna@in.ibm.com> adapted for x86_64 from i386. | 
|---|
| 14 | * 2005-Mar	Roland McGrath <roland@redhat.com> | 
|---|
| 15 | *		Fixed to handle %rip-relative addressing mode correctly. | 
|---|
| 16 | * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston | 
|---|
| 17 | *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi | 
|---|
| 18 | *		<prasanna@in.ibm.com> added function-return probes. | 
|---|
| 19 | * 2005-May	Rusty Lynch <rusty.lynch@intel.com> | 
|---|
| 20 | *		Added function return probes functionality | 
|---|
| 21 | * 2006-Feb	Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added | 
|---|
| 22 | *		kprobe-booster and kretprobe-booster for i386. | 
|---|
| 23 | * 2007-Dec	Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster | 
|---|
| 24 | *		and kretprobe-booster for x86-64 | 
|---|
| 25 | * 2007-Dec	Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven | 
|---|
| 26 | *		<arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> | 
|---|
| 27 | *		unified x86 kprobes code. | 
|---|
| 28 | */ | 
|---|
| 29 | #include <linux/kprobes.h> | 
|---|
| 30 | #include <linux/ptrace.h> | 
|---|
| 31 | #include <linux/string.h> | 
|---|
| 32 | #include <linux/slab.h> | 
|---|
| 33 | #include <linux/hardirq.h> | 
|---|
| 34 | #include <linux/preempt.h> | 
|---|
| 35 | #include <linux/sched/debug.h> | 
|---|
| 36 | #include <linux/perf_event.h> | 
|---|
| 37 | #include <linux/extable.h> | 
|---|
| 38 | #include <linux/kdebug.h> | 
|---|
| 39 | #include <linux/kallsyms.h> | 
|---|
| 40 | #include <linux/kgdb.h> | 
|---|
| 41 | #include <linux/ftrace.h> | 
|---|
| 42 | #include <linux/kasan.h> | 
|---|
| 43 | #include <linux/objtool.h> | 
|---|
| 44 | #include <linux/vmalloc.h> | 
|---|
| 45 | #include <linux/pgtable.h> | 
|---|
| 46 | #include <linux/set_memory.h> | 
|---|
| 47 | #include <linux/cfi.h> | 
|---|
| 48 | #include <linux/execmem.h> | 
|---|
| 49 |  | 
|---|
| 50 | #include <asm/text-patching.h> | 
|---|
| 51 | #include <asm/cacheflush.h> | 
|---|
| 52 | #include <asm/desc.h> | 
|---|
| 53 | #include <linux/uaccess.h> | 
|---|
| 54 | #include <asm/alternative.h> | 
|---|
| 55 | #include <asm/insn.h> | 
|---|
| 56 | #include <asm/debugreg.h> | 
|---|
| 57 | #include <asm/ibt.h> | 
|---|
| 58 |  | 
|---|
| 59 | #include "common.h" | 
|---|
| 60 |  | 
|---|
| 61 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 
|---|
| 62 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | 
|---|
| 63 |  | 
|---|
| 64 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | 
|---|
| 65 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \ | 
|---|
| 66 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \ | 
|---|
| 67 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \ | 
|---|
| 68 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \ | 
|---|
| 69 | << (row % 32)) | 
|---|
| 70 | /* | 
|---|
| 71 | * Undefined/reserved opcodes, conditional jump, Opcode Extension | 
|---|
| 72 | * Groups, and some special opcodes can not boost. | 
|---|
| 73 | * This is non-const and volatile to keep gcc from statically | 
|---|
| 74 | * optimizing it out, as variable_test_bit makes gcc think only | 
|---|
| 75 | * *(unsigned long*) is used. | 
|---|
| 76 | */ | 
|---|
| 77 | static volatile u32 twobyte_is_boostable[256 / 32] = { | 
|---|
| 78 | /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */ | 
|---|
| 79 | /*      ----------------------------------------------          */ | 
|---|
| 80 | W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ | 
|---|
| 81 | W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) , /* 10 */ | 
|---|
| 82 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ | 
|---|
| 83 | W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | 
|---|
| 84 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | 
|---|
| 85 | W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ | 
|---|
| 86 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */ | 
|---|
| 87 | W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ | 
|---|
| 88 | W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */ | 
|---|
| 89 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | 
|---|
| 90 | W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */ | 
|---|
| 91 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */ | 
|---|
| 92 | W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ | 
|---|
| 93 | W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */ | 
|---|
| 94 | W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */ | 
|---|
| 95 | W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0)   /* f0 */ | 
|---|
| 96 | /*      -----------------------------------------------         */ | 
|---|
| 97 | /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */ | 
|---|
| 98 | }; | 
|---|
| 99 | #undef W | 
|---|
| 100 |  | 
|---|
| 101 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | 
|---|
| 102 | { "__switch_to", }, /* This function switches only current task, but | 
|---|
| 103 | doesn't switch kernel stack.*/ | 
|---|
| 104 | {NULL, NULL}	/* Terminator */ | 
|---|
| 105 | }; | 
|---|
| 106 |  | 
|---|
| 107 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | 
|---|
| 108 |  | 
|---|
| 109 | static nokprobe_inline void | 
|---|
| 110 | __synthesize_relative_insn(void *dest, void *from, void *to, u8 op) | 
|---|
| 111 | { | 
|---|
| 112 | struct __arch_relative_insn { | 
|---|
| 113 | u8 op; | 
|---|
| 114 | s32 raddr; | 
|---|
| 115 | } __packed *insn; | 
|---|
| 116 |  | 
|---|
| 117 | insn = (struct __arch_relative_insn *)dest; | 
|---|
| 118 | insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); | 
|---|
| 119 | insn->op = op; | 
|---|
| 120 | } | 
|---|
| 121 |  | 
|---|
| 122 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | 
|---|
| 123 | void synthesize_reljump(void *dest, void *from, void *to) | 
|---|
| 124 | { | 
|---|
| 125 | __synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE); | 
|---|
| 126 | } | 
|---|
| 127 | NOKPROBE_SYMBOL(synthesize_reljump); | 
|---|
| 128 |  | 
|---|
| 129 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | 
|---|
| 130 | void synthesize_relcall(void *dest, void *from, void *to) | 
|---|
| 131 | { | 
|---|
| 132 | __synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE); | 
|---|
| 133 | } | 
|---|
| 134 | NOKPROBE_SYMBOL(synthesize_relcall); | 
|---|
| 135 |  | 
|---|
| 136 | /* | 
|---|
| 137 | * Returns non-zero if INSN is boostable. | 
|---|
| 138 | * RIP relative instructions are adjusted at copying time in 64 bits mode | 
|---|
| 139 | */ | 
|---|
| 140 | bool can_boost(struct insn *insn, void *addr) | 
|---|
| 141 | { | 
|---|
| 142 | kprobe_opcode_t opcode; | 
|---|
| 143 | insn_byte_t prefix; | 
|---|
| 144 | int i; | 
|---|
| 145 |  | 
|---|
| 146 | if (search_exception_tables(add: (unsigned long)addr)) | 
|---|
| 147 | return false;	/* Page fault may occur on this address. */ | 
|---|
| 148 |  | 
|---|
| 149 | /* 2nd-byte opcode */ | 
|---|
| 150 | if (insn->opcode.nbytes == 2) | 
|---|
| 151 | return test_bit(insn->opcode.bytes[1], | 
|---|
| 152 | (unsigned long *)twobyte_is_boostable); | 
|---|
| 153 |  | 
|---|
| 154 | if (insn->opcode.nbytes != 1) | 
|---|
| 155 | return false; | 
|---|
| 156 |  | 
|---|
| 157 | for_each_insn_prefix(insn, i, prefix) { | 
|---|
| 158 | insn_attr_t attr; | 
|---|
| 159 |  | 
|---|
| 160 | attr = inat_get_opcode_attribute(opcode: prefix); | 
|---|
| 161 | /* Can't boost Address-size override prefix and CS override prefix */ | 
|---|
| 162 | if (prefix == 0x2e || inat_is_address_size_prefix(attr)) | 
|---|
| 163 | return false; | 
|---|
| 164 | } | 
|---|
| 165 |  | 
|---|
| 166 | opcode = insn->opcode.bytes[0]; | 
|---|
| 167 |  | 
|---|
| 168 | switch (opcode) { | 
|---|
| 169 | case 0x62:		/* bound */ | 
|---|
| 170 | case 0x70 ... 0x7f:	/* Conditional jumps */ | 
|---|
| 171 | case 0x9a:		/* Call far */ | 
|---|
| 172 | case 0xcc ... 0xce:	/* software exceptions */ | 
|---|
| 173 | case 0xd6:		/* (UD) */ | 
|---|
| 174 | case 0xd8 ... 0xdf:	/* ESC */ | 
|---|
| 175 | case 0xe0 ... 0xe3:	/* LOOP*, JCXZ */ | 
|---|
| 176 | case 0xe8 ... 0xe9:	/* near Call, JMP */ | 
|---|
| 177 | case 0xeb:		/* Short JMP */ | 
|---|
| 178 | case 0xf0 ... 0xf4:	/* LOCK/REP, HLT */ | 
|---|
| 179 | /* ... are not boostable */ | 
|---|
| 180 | return false; | 
|---|
| 181 | case 0xc0 ... 0xc1:	/* Grp2 */ | 
|---|
| 182 | case 0xd0 ... 0xd3:	/* Grp2 */ | 
|---|
| 183 | /* | 
|---|
| 184 | * AMD uses nnn == 110 as SHL/SAL, but Intel makes it reserved. | 
|---|
| 185 | */ | 
|---|
| 186 | return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b110; | 
|---|
| 187 | case 0xf6 ... 0xf7:	/* Grp3 */ | 
|---|
| 188 | /* AMD uses nnn == 001 as TEST, but Intel makes it reserved. */ | 
|---|
| 189 | return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b001; | 
|---|
| 190 | case 0xfe:		/* Grp4 */ | 
|---|
| 191 | /* Only INC and DEC are boostable */ | 
|---|
| 192 | return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 || | 
|---|
| 193 | X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001; | 
|---|
| 194 | case 0xff:		/* Grp5 */ | 
|---|
| 195 | /* Only INC, DEC, and indirect JMP are boostable */ | 
|---|
| 196 | return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 || | 
|---|
| 197 | X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001 || | 
|---|
| 198 | X86_MODRM_REG(insn->modrm.bytes[0]) == 0b100; | 
|---|
| 199 | default: | 
|---|
| 200 | return true; | 
|---|
| 201 | } | 
|---|
| 202 | } | 
|---|
| 203 |  | 
|---|
| 204 | static unsigned long | 
|---|
| 205 | __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) | 
|---|
| 206 | { | 
|---|
| 207 | struct kprobe *kp; | 
|---|
| 208 | bool faddr; | 
|---|
| 209 |  | 
|---|
| 210 | kp = get_kprobe(addr: (void *)addr); | 
|---|
| 211 | faddr = ftrace_location(ip: addr) == addr; | 
|---|
| 212 | /* | 
|---|
| 213 | * Use the current code if it is not modified by Kprobe | 
|---|
| 214 | * and it cannot be modified by ftrace. | 
|---|
| 215 | */ | 
|---|
| 216 | if (!kp && !faddr) | 
|---|
| 217 | return addr; | 
|---|
| 218 |  | 
|---|
| 219 | /* | 
|---|
| 220 | * Basically, kp->ainsn.insn has an original instruction. | 
|---|
| 221 | * However, RIP-relative instruction can not do single-stepping | 
|---|
| 222 | * at different place, __copy_instruction() tweaks the displacement of | 
|---|
| 223 | * that instruction. In that case, we can't recover the instruction | 
|---|
| 224 | * from the kp->ainsn.insn. | 
|---|
| 225 | * | 
|---|
| 226 | * On the other hand, in case on normal Kprobe, kp->opcode has a copy | 
|---|
| 227 | * of the first byte of the probed instruction, which is overwritten | 
|---|
| 228 | * by int3. And the instruction at kp->addr is not modified by kprobes | 
|---|
| 229 | * except for the first byte, we can recover the original instruction | 
|---|
| 230 | * from it and kp->opcode. | 
|---|
| 231 | * | 
|---|
| 232 | * In case of Kprobes using ftrace, we do not have a copy of | 
|---|
| 233 | * the original instruction. In fact, the ftrace location might | 
|---|
| 234 | * be modified at anytime and even could be in an inconsistent state. | 
|---|
| 235 | * Fortunately, we know that the original code is the ideal 5-byte | 
|---|
| 236 | * long NOP. | 
|---|
| 237 | */ | 
|---|
| 238 | if (copy_from_kernel_nofault(dst: buf, src: (void *)addr, | 
|---|
| 239 | MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) | 
|---|
| 240 | return 0UL; | 
|---|
| 241 |  | 
|---|
| 242 | if (faddr) | 
|---|
| 243 | memcpy(to: buf, from: x86_nops[5], len: 5); | 
|---|
| 244 | else | 
|---|
| 245 | buf[0] = kp->opcode; | 
|---|
| 246 | return (unsigned long)buf; | 
|---|
| 247 | } | 
|---|
| 248 |  | 
|---|
| 249 | /* | 
|---|
| 250 | * Recover the probed instruction at addr for further analysis. | 
|---|
| 251 | * Caller must lock kprobes by kprobe_mutex, or disable preemption | 
|---|
| 252 | * for preventing to release referencing kprobes. | 
|---|
| 253 | * Returns zero if the instruction can not get recovered (or access failed). | 
|---|
| 254 | */ | 
|---|
| 255 | unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | 
|---|
| 256 | { | 
|---|
| 257 | unsigned long __addr; | 
|---|
| 258 |  | 
|---|
| 259 | __addr = __recover_optprobed_insn(buf, addr); | 
|---|
| 260 | if (__addr != addr) | 
|---|
| 261 | return __addr; | 
|---|
| 262 |  | 
|---|
| 263 | return __recover_probed_insn(buf, addr); | 
|---|
| 264 | } | 
|---|
| 265 |  | 
|---|
| 266 | /* Check if insn is INT or UD */ | 
|---|
| 267 | static inline bool is_exception_insn(struct insn *insn) | 
|---|
| 268 | { | 
|---|
| 269 | /* UD uses 0f escape */ | 
|---|
| 270 | if (insn->opcode.bytes[0] == 0x0f) { | 
|---|
| 271 | /* UD0 / UD1 / UD2 */ | 
|---|
| 272 | return insn->opcode.bytes[1] == 0xff || | 
|---|
| 273 | insn->opcode.bytes[1] == 0xb9 || | 
|---|
| 274 | insn->opcode.bytes[1] == 0x0b; | 
|---|
| 275 | } | 
|---|
| 276 |  | 
|---|
| 277 | /* INT3 / INT n / INTO / INT1 */ | 
|---|
| 278 | return insn->opcode.bytes[0] == 0xcc || | 
|---|
| 279 | insn->opcode.bytes[0] == 0xcd || | 
|---|
| 280 | insn->opcode.bytes[0] == 0xce || | 
|---|
| 281 | insn->opcode.bytes[0] == 0xf1; | 
|---|
| 282 | } | 
|---|
| 283 |  | 
|---|
| 284 | /* | 
|---|
| 285 | * Check if paddr is at an instruction boundary and that instruction can | 
|---|
| 286 | * be probed | 
|---|
| 287 | */ | 
|---|
| 288 | static bool can_probe(unsigned long paddr) | 
|---|
| 289 | { | 
|---|
| 290 | unsigned long addr, __addr, offset = 0; | 
|---|
| 291 | struct insn insn; | 
|---|
| 292 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 
|---|
| 293 |  | 
|---|
| 294 | if (!kallsyms_lookup_size_offset(addr: paddr, NULL, offset: &offset)) | 
|---|
| 295 | return false; | 
|---|
| 296 |  | 
|---|
| 297 | /* Decode instructions */ | 
|---|
| 298 | addr = paddr - offset; | 
|---|
| 299 | while (addr < paddr) { | 
|---|
| 300 | /* | 
|---|
| 301 | * Check if the instruction has been modified by another | 
|---|
| 302 | * kprobe, in which case we replace the breakpoint by the | 
|---|
| 303 | * original instruction in our buffer. | 
|---|
| 304 | * Also, jump optimization will change the breakpoint to | 
|---|
| 305 | * relative-jump. Since the relative-jump itself is | 
|---|
| 306 | * normally used, we just go through if there is no kprobe. | 
|---|
| 307 | */ | 
|---|
| 308 | __addr = recover_probed_instruction(buf, addr); | 
|---|
| 309 | if (!__addr) | 
|---|
| 310 | return false; | 
|---|
| 311 |  | 
|---|
| 312 | if (insn_decode_kernel(&insn, (void *)__addr) < 0) | 
|---|
| 313 | return false; | 
|---|
| 314 |  | 
|---|
| 315 | #ifdef CONFIG_KGDB | 
|---|
| 316 | /* | 
|---|
| 317 | * If there is a dynamically installed kgdb sw breakpoint, | 
|---|
| 318 | * this function should not be probed. | 
|---|
| 319 | */ | 
|---|
| 320 | if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && | 
|---|
| 321 | kgdb_has_hit_break(addr)) | 
|---|
| 322 | return false; | 
|---|
| 323 | #endif | 
|---|
| 324 | addr += insn.length; | 
|---|
| 325 | } | 
|---|
| 326 |  | 
|---|
| 327 | /* Check if paddr is at an instruction boundary */ | 
|---|
| 328 | if (addr != paddr) | 
|---|
| 329 | return false; | 
|---|
| 330 |  | 
|---|
| 331 | __addr = recover_probed_instruction(buf, addr); | 
|---|
| 332 | if (!__addr) | 
|---|
| 333 | return false; | 
|---|
| 334 |  | 
|---|
| 335 | if (insn_decode_kernel(&insn, (void *)__addr) < 0) | 
|---|
| 336 | return false; | 
|---|
| 337 |  | 
|---|
| 338 | /* INT and UD are special and should not be kprobed */ | 
|---|
| 339 | if (is_exception_insn(insn: &insn)) | 
|---|
| 340 | return false; | 
|---|
| 341 |  | 
|---|
| 342 | if (IS_ENABLED(CONFIG_CFI)) { | 
|---|
| 343 | /* | 
|---|
| 344 | * The compiler generates the following instruction sequence | 
|---|
| 345 | * for indirect call checks and cfi.c decodes this; | 
|---|
| 346 | * | 
|---|
| 347 | *   movl    -<id>, %r10d       ; 6 bytes | 
|---|
| 348 | *   addl    -4(%reg), %r10d    ; 4 bytes | 
|---|
| 349 | *   je      .Ltmp1             ; 2 bytes | 
|---|
| 350 | *   ud2                        ; <- regs->ip | 
|---|
| 351 | *   .Ltmp1: | 
|---|
| 352 | * | 
|---|
| 353 | * Also, these movl and addl are used for showing expected | 
|---|
| 354 | * type. So those must not be touched. | 
|---|
| 355 | */ | 
|---|
| 356 | if (insn.opcode.value == 0xBA) | 
|---|
| 357 | offset = 12; | 
|---|
| 358 | else if (insn.opcode.value == 0x3) | 
|---|
| 359 | offset = 6; | 
|---|
| 360 | else | 
|---|
| 361 | goto out; | 
|---|
| 362 |  | 
|---|
| 363 | /* This movl/addl is used for decoding CFI. */ | 
|---|
| 364 | if (is_cfi_trap(addr: addr + offset)) | 
|---|
| 365 | return false; | 
|---|
| 366 | } | 
|---|
| 367 |  | 
|---|
| 368 | out: | 
|---|
| 369 | return true; | 
|---|
| 370 | } | 
|---|
| 371 |  | 
|---|
| 372 | /* If x86 supports IBT (ENDBR) it must be skipped. */ | 
|---|
| 373 | kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, | 
|---|
| 374 | bool *on_func_entry) | 
|---|
| 375 | { | 
|---|
| 376 | if (is_endbr(val: (u32 *)addr)) { | 
|---|
| 377 | *on_func_entry = !offset || offset == 4; | 
|---|
| 378 | if (*on_func_entry) | 
|---|
| 379 | offset = 4; | 
|---|
| 380 |  | 
|---|
| 381 | } else { | 
|---|
| 382 | *on_func_entry = !offset; | 
|---|
| 383 | } | 
|---|
| 384 |  | 
|---|
| 385 | return (kprobe_opcode_t *)(addr + offset); | 
|---|
| 386 | } | 
|---|
| 387 |  | 
|---|
| 388 | /* | 
|---|
| 389 | * Copy an instruction with recovering modified instruction by kprobes | 
|---|
| 390 | * and adjust the displacement if the instruction uses the %rip-relative | 
|---|
| 391 | * addressing mode. Note that since @real will be the final place of copied | 
|---|
| 392 | * instruction, displacement must be adjust by @real, not @dest. | 
|---|
| 393 | * This returns the length of copied instruction, or 0 if it has an error. | 
|---|
| 394 | */ | 
|---|
| 395 | int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) | 
|---|
| 396 | { | 
|---|
| 397 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 
|---|
| 398 | unsigned long recovered_insn = recover_probed_instruction(buf, addr: (unsigned long)src); | 
|---|
| 399 | int ret; | 
|---|
| 400 |  | 
|---|
| 401 | if (!recovered_insn || !insn) | 
|---|
| 402 | return 0; | 
|---|
| 403 |  | 
|---|
| 404 | /* This can access kernel text if given address is not recovered */ | 
|---|
| 405 | if (copy_from_kernel_nofault(dst: dest, src: (void *)recovered_insn, | 
|---|
| 406 | MAX_INSN_SIZE)) | 
|---|
| 407 | return 0; | 
|---|
| 408 |  | 
|---|
| 409 | ret = insn_decode_kernel(insn, dest); | 
|---|
| 410 | if (ret < 0) | 
|---|
| 411 | return 0; | 
|---|
| 412 |  | 
|---|
| 413 | /* We can not probe force emulate prefixed instruction */ | 
|---|
| 414 | if (insn_has_emulate_prefix(insn)) | 
|---|
| 415 | return 0; | 
|---|
| 416 |  | 
|---|
| 417 | /* Another subsystem puts a breakpoint, failed to recover */ | 
|---|
| 418 | if (insn->opcode.bytes[0] == INT3_INSN_OPCODE) | 
|---|
| 419 | return 0; | 
|---|
| 420 |  | 
|---|
| 421 | /* We should not singlestep on the exception masking instructions */ | 
|---|
| 422 | if (insn_masking_exception(insn)) | 
|---|
| 423 | return 0; | 
|---|
| 424 |  | 
|---|
| 425 | #ifdef CONFIG_X86_64 | 
|---|
| 426 | /* Only x86_64 has RIP relative instructions */ | 
|---|
| 427 | if (insn_rip_relative(insn)) { | 
|---|
| 428 | s64 newdisp; | 
|---|
| 429 | u8 *disp; | 
|---|
| 430 | /* | 
|---|
| 431 | * The copied instruction uses the %rip-relative addressing | 
|---|
| 432 | * mode.  Adjust the displacement for the difference between | 
|---|
| 433 | * the original location of this instruction and the location | 
|---|
| 434 | * of the copy that will actually be run.  The tricky bit here | 
|---|
| 435 | * is making sure that the sign extension happens correctly in | 
|---|
| 436 | * this calculation, since we need a signed 32-bit result to | 
|---|
| 437 | * be sign-extended to 64 bits when it's added to the %rip | 
|---|
| 438 | * value and yield the same 64-bit result that the sign- | 
|---|
| 439 | * extension of the original signed 32-bit displacement would | 
|---|
| 440 | * have given. | 
|---|
| 441 | */ | 
|---|
| 442 | newdisp = (u8 *) src + (s64) insn->displacement.value | 
|---|
| 443 | - (u8 *) real; | 
|---|
| 444 | if ((s64) (s32) newdisp != newdisp) { | 
|---|
| 445 | pr_err( "Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); | 
|---|
| 446 | return 0; | 
|---|
| 447 | } | 
|---|
| 448 | disp = (u8 *) dest + insn_offset_displacement(insn); | 
|---|
| 449 | *(s32 *) disp = (s32) newdisp; | 
|---|
| 450 | } | 
|---|
| 451 | #endif | 
|---|
| 452 | return insn->length; | 
|---|
| 453 | } | 
|---|
| 454 |  | 
|---|
| 455 | /* Prepare reljump or int3 right after instruction */ | 
|---|
| 456 | static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p, | 
|---|
| 457 | struct insn *insn) | 
|---|
| 458 | { | 
|---|
| 459 | int len = insn->length; | 
|---|
| 460 |  | 
|---|
| 461 | if (!IS_ENABLED(CONFIG_PREEMPTION) && | 
|---|
| 462 | !p->post_handler && can_boost(insn, addr: p->addr) && | 
|---|
| 463 | MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) { | 
|---|
| 464 | /* | 
|---|
| 465 | * These instructions can be executed directly if it | 
|---|
| 466 | * jumps back to correct address. | 
|---|
| 467 | */ | 
|---|
| 468 | synthesize_reljump(dest: buf + len, from: p->ainsn.insn + len, | 
|---|
| 469 | to: p->addr + insn->length); | 
|---|
| 470 | len += JMP32_INSN_SIZE; | 
|---|
| 471 | p->ainsn.boostable = 1; | 
|---|
| 472 | } else { | 
|---|
| 473 | /* Otherwise, put an int3 for trapping singlestep */ | 
|---|
| 474 | if (MAX_INSN_SIZE - len < INT3_INSN_SIZE) | 
|---|
| 475 | return -ENOSPC; | 
|---|
| 476 |  | 
|---|
| 477 | buf[len] = INT3_INSN_OPCODE; | 
|---|
| 478 | len += INT3_INSN_SIZE; | 
|---|
| 479 | } | 
|---|
| 480 |  | 
|---|
| 481 | return len; | 
|---|
| 482 | } | 
|---|
| 483 |  | 
|---|
| 484 | /* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */ | 
|---|
| 485 |  | 
|---|
| 486 | static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs) | 
|---|
| 487 | { | 
|---|
| 488 | switch (p->ainsn.opcode) { | 
|---|
| 489 | case 0xfa:	/* cli */ | 
|---|
| 490 | regs->flags &= ~(X86_EFLAGS_IF); | 
|---|
| 491 | break; | 
|---|
| 492 | case 0xfb:	/* sti */ | 
|---|
| 493 | regs->flags |= X86_EFLAGS_IF; | 
|---|
| 494 | break; | 
|---|
| 495 | case 0x9c:	/* pushf */ | 
|---|
| 496 | int3_emulate_push(regs, val: regs->flags); | 
|---|
| 497 | break; | 
|---|
| 498 | case 0x9d:	/* popf */ | 
|---|
| 499 | regs->flags = int3_emulate_pop(regs); | 
|---|
| 500 | break; | 
|---|
| 501 | } | 
|---|
| 502 | regs->ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; | 
|---|
| 503 | } | 
|---|
| 504 | NOKPROBE_SYMBOL(kprobe_emulate_ifmodifiers); | 
|---|
| 505 |  | 
|---|
| 506 | static void kprobe_emulate_ret(struct kprobe *p, struct pt_regs *regs) | 
|---|
| 507 | { | 
|---|
| 508 | int3_emulate_ret(regs); | 
|---|
| 509 | } | 
|---|
| 510 | NOKPROBE_SYMBOL(kprobe_emulate_ret); | 
|---|
| 511 |  | 
|---|
| 512 | static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs) | 
|---|
| 513 | { | 
|---|
| 514 | unsigned long func = regs->ip - INT3_INSN_SIZE + p->ainsn.size; | 
|---|
| 515 |  | 
|---|
| 516 | func += p->ainsn.rel32; | 
|---|
| 517 | int3_emulate_call(regs, func); | 
|---|
| 518 | } | 
|---|
| 519 | NOKPROBE_SYMBOL(kprobe_emulate_call); | 
|---|
| 520 |  | 
|---|
| 521 | static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs) | 
|---|
| 522 | { | 
|---|
| 523 | unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; | 
|---|
| 524 |  | 
|---|
| 525 | ip += p->ainsn.rel32; | 
|---|
| 526 | int3_emulate_jmp(regs, ip); | 
|---|
| 527 | } | 
|---|
| 528 | NOKPROBE_SYMBOL(kprobe_emulate_jmp); | 
|---|
| 529 |  | 
|---|
| 530 | static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) | 
|---|
| 531 | { | 
|---|
| 532 | unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; | 
|---|
| 533 |  | 
|---|
| 534 | int3_emulate_jcc(regs, cc: p->ainsn.jcc.type, ip, disp: p->ainsn.rel32); | 
|---|
| 535 | } | 
|---|
| 536 | NOKPROBE_SYMBOL(kprobe_emulate_jcc); | 
|---|
| 537 |  | 
|---|
| 538 | static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs) | 
|---|
| 539 | { | 
|---|
| 540 | unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; | 
|---|
| 541 | bool match; | 
|---|
| 542 |  | 
|---|
| 543 | if (p->ainsn.loop.type != 3) {	/* LOOP* */ | 
|---|
| 544 | if (p->ainsn.loop.asize == 32) | 
|---|
| 545 | match = ((*(u32 *)®s->cx)--) != 0; | 
|---|
| 546 | #ifdef CONFIG_X86_64 | 
|---|
| 547 | else if (p->ainsn.loop.asize == 64) | 
|---|
| 548 | match = ((*(u64 *)®s->cx)--) != 0; | 
|---|
| 549 | #endif | 
|---|
| 550 | else | 
|---|
| 551 | match = ((*(u16 *)®s->cx)--) != 0; | 
|---|
| 552 | } else {			/* JCXZ */ | 
|---|
| 553 | if (p->ainsn.loop.asize == 32) | 
|---|
| 554 | match = *(u32 *)(®s->cx) == 0; | 
|---|
| 555 | #ifdef CONFIG_X86_64 | 
|---|
| 556 | else if (p->ainsn.loop.asize == 64) | 
|---|
| 557 | match = *(u64 *)(®s->cx) == 0; | 
|---|
| 558 | #endif | 
|---|
| 559 | else | 
|---|
| 560 | match = *(u16 *)(®s->cx) == 0; | 
|---|
| 561 | } | 
|---|
| 562 |  | 
|---|
| 563 | if (p->ainsn.loop.type == 0)	/* LOOPNE */ | 
|---|
| 564 | match = match && !(regs->flags & X86_EFLAGS_ZF); | 
|---|
| 565 | else if (p->ainsn.loop.type == 1)	/* LOOPE */ | 
|---|
| 566 | match = match && (regs->flags & X86_EFLAGS_ZF); | 
|---|
| 567 |  | 
|---|
| 568 | if (match) | 
|---|
| 569 | ip += p->ainsn.rel32; | 
|---|
| 570 | int3_emulate_jmp(regs, ip); | 
|---|
| 571 | } | 
|---|
| 572 | NOKPROBE_SYMBOL(kprobe_emulate_loop); | 
|---|
| 573 |  | 
|---|
| 574 | static const int addrmode_regoffs[] = { | 
|---|
| 575 | offsetof(struct pt_regs, ax), | 
|---|
| 576 | offsetof(struct pt_regs, cx), | 
|---|
| 577 | offsetof(struct pt_regs, dx), | 
|---|
| 578 | offsetof(struct pt_regs, bx), | 
|---|
| 579 | offsetof(struct pt_regs, sp), | 
|---|
| 580 | offsetof(struct pt_regs, bp), | 
|---|
| 581 | offsetof(struct pt_regs, si), | 
|---|
| 582 | offsetof(struct pt_regs, di), | 
|---|
| 583 | #ifdef CONFIG_X86_64 | 
|---|
| 584 | offsetof(struct pt_regs, r8), | 
|---|
| 585 | offsetof(struct pt_regs, r9), | 
|---|
| 586 | offsetof(struct pt_regs, r10), | 
|---|
| 587 | offsetof(struct pt_regs, r11), | 
|---|
| 588 | offsetof(struct pt_regs, r12), | 
|---|
| 589 | offsetof(struct pt_regs, r13), | 
|---|
| 590 | offsetof(struct pt_regs, r14), | 
|---|
| 591 | offsetof(struct pt_regs, r15), | 
|---|
| 592 | #endif | 
|---|
| 593 | }; | 
|---|
| 594 |  | 
|---|
| 595 | static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs) | 
|---|
| 596 | { | 
|---|
| 597 | unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg]; | 
|---|
| 598 |  | 
|---|
| 599 | int3_emulate_push(regs, val: regs->ip - INT3_INSN_SIZE + p->ainsn.size); | 
|---|
| 600 | int3_emulate_jmp(regs, ip: regs_get_register(regs, offset: offs)); | 
|---|
| 601 | } | 
|---|
| 602 | NOKPROBE_SYMBOL(kprobe_emulate_call_indirect); | 
|---|
| 603 |  | 
|---|
| 604 | static void kprobe_emulate_jmp_indirect(struct kprobe *p, struct pt_regs *regs) | 
|---|
| 605 | { | 
|---|
| 606 | unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg]; | 
|---|
| 607 |  | 
|---|
| 608 | int3_emulate_jmp(regs, ip: regs_get_register(regs, offset: offs)); | 
|---|
| 609 | } | 
|---|
| 610 | NOKPROBE_SYMBOL(kprobe_emulate_jmp_indirect); | 
|---|
| 611 |  | 
|---|
| 612 | static int prepare_emulation(struct kprobe *p, struct insn *insn) | 
|---|
| 613 | { | 
|---|
| 614 | insn_byte_t opcode = insn->opcode.bytes[0]; | 
|---|
| 615 |  | 
|---|
| 616 | switch (opcode) { | 
|---|
| 617 | case 0xfa:		/* cli */ | 
|---|
| 618 | case 0xfb:		/* sti */ | 
|---|
| 619 | case 0x9c:		/* pushfl */ | 
|---|
| 620 | case 0x9d:		/* popf/popfd */ | 
|---|
| 621 | /* | 
|---|
| 622 | * IF modifiers must be emulated since it will enable interrupt while | 
|---|
| 623 | * int3 single stepping. | 
|---|
| 624 | */ | 
|---|
| 625 | p->ainsn.emulate_op = kprobe_emulate_ifmodifiers; | 
|---|
| 626 | p->ainsn.opcode = opcode; | 
|---|
| 627 | break; | 
|---|
| 628 | case 0xc2:	/* ret/lret */ | 
|---|
| 629 | case 0xc3: | 
|---|
| 630 | case 0xca: | 
|---|
| 631 | case 0xcb: | 
|---|
| 632 | p->ainsn.emulate_op = kprobe_emulate_ret; | 
|---|
| 633 | break; | 
|---|
| 634 | case 0x9a:	/* far call absolute -- segment is not supported */ | 
|---|
| 635 | case 0xea:	/* far jmp absolute -- segment is not supported */ | 
|---|
| 636 | case 0xcc:	/* int3 */ | 
|---|
| 637 | case 0xcf:	/* iret -- in-kernel IRET is not supported */ | 
|---|
| 638 | return -EOPNOTSUPP; | 
|---|
| 639 | break; | 
|---|
| 640 | case 0xe8:	/* near call relative */ | 
|---|
| 641 | p->ainsn.emulate_op = kprobe_emulate_call; | 
|---|
| 642 | if (insn->immediate.nbytes == 2) | 
|---|
| 643 | p->ainsn.rel32 = *(s16 *)&insn->immediate.value; | 
|---|
| 644 | else | 
|---|
| 645 | p->ainsn.rel32 = *(s32 *)&insn->immediate.value; | 
|---|
| 646 | break; | 
|---|
| 647 | case 0xeb:	/* short jump relative */ | 
|---|
| 648 | case 0xe9:	/* near jump relative */ | 
|---|
| 649 | p->ainsn.emulate_op = kprobe_emulate_jmp; | 
|---|
| 650 | if (insn->immediate.nbytes == 1) | 
|---|
| 651 | p->ainsn.rel32 = *(s8 *)&insn->immediate.value; | 
|---|
| 652 | else if (insn->immediate.nbytes == 2) | 
|---|
| 653 | p->ainsn.rel32 = *(s16 *)&insn->immediate.value; | 
|---|
| 654 | else | 
|---|
| 655 | p->ainsn.rel32 = *(s32 *)&insn->immediate.value; | 
|---|
| 656 | break; | 
|---|
| 657 | case 0x70 ... 0x7f: | 
|---|
| 658 | /* 1 byte conditional jump */ | 
|---|
| 659 | p->ainsn.emulate_op = kprobe_emulate_jcc; | 
|---|
| 660 | p->ainsn.jcc.type = opcode & 0xf; | 
|---|
| 661 | p->ainsn.rel32 = insn->immediate.value; | 
|---|
| 662 | break; | 
|---|
| 663 | case 0x0f: | 
|---|
| 664 | opcode = insn->opcode.bytes[1]; | 
|---|
| 665 | if ((opcode & 0xf0) == 0x80) { | 
|---|
| 666 | /* 2 bytes Conditional Jump */ | 
|---|
| 667 | p->ainsn.emulate_op = kprobe_emulate_jcc; | 
|---|
| 668 | p->ainsn.jcc.type = opcode & 0xf; | 
|---|
| 669 | if (insn->immediate.nbytes == 2) | 
|---|
| 670 | p->ainsn.rel32 = *(s16 *)&insn->immediate.value; | 
|---|
| 671 | else | 
|---|
| 672 | p->ainsn.rel32 = *(s32 *)&insn->immediate.value; | 
|---|
| 673 | } else if (opcode == 0x01 && | 
|---|
| 674 | X86_MODRM_REG(insn->modrm.bytes[0]) == 0 && | 
|---|
| 675 | X86_MODRM_MOD(insn->modrm.bytes[0]) == 3) { | 
|---|
| 676 | /* VM extensions - not supported */ | 
|---|
| 677 | return -EOPNOTSUPP; | 
|---|
| 678 | } | 
|---|
| 679 | break; | 
|---|
| 680 | case 0xe0:	/* Loop NZ */ | 
|---|
| 681 | case 0xe1:	/* Loop */ | 
|---|
| 682 | case 0xe2:	/* Loop */ | 
|---|
| 683 | case 0xe3:	/* J*CXZ */ | 
|---|
| 684 | p->ainsn.emulate_op = kprobe_emulate_loop; | 
|---|
| 685 | p->ainsn.loop.type = opcode & 0x3; | 
|---|
| 686 | p->ainsn.loop.asize = insn->addr_bytes * 8; | 
|---|
| 687 | p->ainsn.rel32 = *(s8 *)&insn->immediate.value; | 
|---|
| 688 | break; | 
|---|
| 689 | case 0xff: | 
|---|
| 690 | /* | 
|---|
| 691 | * Since the 0xff is an extended group opcode, the instruction | 
|---|
| 692 | * is determined by the MOD/RM byte. | 
|---|
| 693 | */ | 
|---|
| 694 | opcode = insn->modrm.bytes[0]; | 
|---|
| 695 | switch (X86_MODRM_REG(opcode)) { | 
|---|
| 696 | case 0b010:	/* FF /2, call near, absolute indirect */ | 
|---|
| 697 | p->ainsn.emulate_op = kprobe_emulate_call_indirect; | 
|---|
| 698 | break; | 
|---|
| 699 | case 0b100:	/* FF /4, jmp near, absolute indirect */ | 
|---|
| 700 | p->ainsn.emulate_op = kprobe_emulate_jmp_indirect; | 
|---|
| 701 | break; | 
|---|
| 702 | case 0b011:	/* FF /3, call far, absolute indirect */ | 
|---|
| 703 | case 0b101:	/* FF /5, jmp far, absolute indirect */ | 
|---|
| 704 | return -EOPNOTSUPP; | 
|---|
| 705 | } | 
|---|
| 706 |  | 
|---|
| 707 | if (!p->ainsn.emulate_op) | 
|---|
| 708 | break; | 
|---|
| 709 |  | 
|---|
| 710 | if (insn->addr_bytes != sizeof(unsigned long)) | 
|---|
| 711 | return -EOPNOTSUPP;	/* Don't support different size */ | 
|---|
| 712 | if (X86_MODRM_MOD(opcode) != 3) | 
|---|
| 713 | return -EOPNOTSUPP;	/* TODO: support memory addressing */ | 
|---|
| 714 |  | 
|---|
| 715 | p->ainsn.indirect.reg = X86_MODRM_RM(opcode); | 
|---|
| 716 | #ifdef CONFIG_X86_64 | 
|---|
| 717 | if (X86_REX_B(insn->rex_prefix.value)) | 
|---|
| 718 | p->ainsn.indirect.reg += 8; | 
|---|
| 719 | #endif | 
|---|
| 720 | break; | 
|---|
| 721 | default: | 
|---|
| 722 | break; | 
|---|
| 723 | } | 
|---|
| 724 | p->ainsn.size = insn->length; | 
|---|
| 725 |  | 
|---|
| 726 | return 0; | 
|---|
| 727 | } | 
|---|
| 728 |  | 
|---|
| 729 | static int arch_copy_kprobe(struct kprobe *p) | 
|---|
| 730 | { | 
|---|
| 731 | struct insn insn; | 
|---|
| 732 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 
|---|
| 733 | int ret, len; | 
|---|
| 734 |  | 
|---|
| 735 | /* Copy an instruction with recovering if other optprobe modifies it.*/ | 
|---|
| 736 | len = __copy_instruction(dest: buf, src: p->addr, real: p->ainsn.insn, insn: &insn); | 
|---|
| 737 | if (!len) | 
|---|
| 738 | return -EINVAL; | 
|---|
| 739 |  | 
|---|
| 740 | /* Analyze the opcode and setup emulate functions */ | 
|---|
| 741 | ret = prepare_emulation(p, insn: &insn); | 
|---|
| 742 | if (ret < 0) | 
|---|
| 743 | return ret; | 
|---|
| 744 |  | 
|---|
| 745 | /* Add int3 for single-step or booster jmp */ | 
|---|
| 746 | len = prepare_singlestep(buf, p, insn: &insn); | 
|---|
| 747 | if (len < 0) | 
|---|
| 748 | return len; | 
|---|
| 749 |  | 
|---|
| 750 | /* Also, displacement change doesn't affect the first byte */ | 
|---|
| 751 | p->opcode = buf[0]; | 
|---|
| 752 |  | 
|---|
| 753 | p->ainsn.tp_len = len; | 
|---|
| 754 | perf_event_text_poke(addr: p->ainsn.insn, NULL, old_len: 0, new_bytes: buf, new_len: len); | 
|---|
| 755 |  | 
|---|
| 756 | /* OK, write back the instruction(s) into ROX insn buffer */ | 
|---|
| 757 | text_poke(addr: p->ainsn.insn, opcode: buf, len); | 
|---|
| 758 |  | 
|---|
| 759 | return 0; | 
|---|
| 760 | } | 
|---|
| 761 |  | 
|---|
| 762 | int arch_prepare_kprobe(struct kprobe *p) | 
|---|
| 763 | { | 
|---|
| 764 | int ret; | 
|---|
| 765 |  | 
|---|
| 766 | if (alternatives_text_reserved(start: p->addr, end: p->addr)) | 
|---|
| 767 | return -EINVAL; | 
|---|
| 768 |  | 
|---|
| 769 | if (!can_probe(paddr: (unsigned long)p->addr)) | 
|---|
| 770 | return -EILSEQ; | 
|---|
| 771 |  | 
|---|
| 772 | memset(s: &p->ainsn, c: 0, n: sizeof(p->ainsn)); | 
|---|
| 773 |  | 
|---|
| 774 | /* insn: must be on special executable page on x86. */ | 
|---|
| 775 | p->ainsn.insn = get_insn_slot(); | 
|---|
| 776 | if (!p->ainsn.insn) | 
|---|
| 777 | return -ENOMEM; | 
|---|
| 778 |  | 
|---|
| 779 | ret = arch_copy_kprobe(p); | 
|---|
| 780 | if (ret) { | 
|---|
| 781 | free_insn_slot(slot: p->ainsn.insn, dirty: 0); | 
|---|
| 782 | p->ainsn.insn = NULL; | 
|---|
| 783 | } | 
|---|
| 784 |  | 
|---|
| 785 | return ret; | 
|---|
| 786 | } | 
|---|
| 787 |  | 
|---|
| 788 | void arch_arm_kprobe(struct kprobe *p) | 
|---|
| 789 | { | 
|---|
| 790 | u8 int3 = INT3_INSN_OPCODE; | 
|---|
| 791 |  | 
|---|
| 792 | text_poke(addr: p->addr, opcode: &int3, len: 1); | 
|---|
| 793 | smp_text_poke_sync_each_cpu(); | 
|---|
| 794 | perf_event_text_poke(addr: p->addr, old_bytes: &p->opcode, old_len: 1, new_bytes: &int3, new_len: 1); | 
|---|
| 795 | } | 
|---|
| 796 |  | 
|---|
| 797 | void arch_disarm_kprobe(struct kprobe *p) | 
|---|
| 798 | { | 
|---|
| 799 | u8 int3 = INT3_INSN_OPCODE; | 
|---|
| 800 |  | 
|---|
| 801 | perf_event_text_poke(addr: p->addr, old_bytes: &int3, old_len: 1, new_bytes: &p->opcode, new_len: 1); | 
|---|
| 802 | text_poke(addr: p->addr, opcode: &p->opcode, len: 1); | 
|---|
| 803 | smp_text_poke_sync_each_cpu(); | 
|---|
| 804 | } | 
|---|
| 805 |  | 
|---|
| 806 | void arch_remove_kprobe(struct kprobe *p) | 
|---|
| 807 | { | 
|---|
| 808 | if (p->ainsn.insn) { | 
|---|
| 809 | /* Record the perf event before freeing the slot */ | 
|---|
| 810 | perf_event_text_poke(addr: p->ainsn.insn, old_bytes: p->ainsn.insn, | 
|---|
| 811 | old_len: p->ainsn.tp_len, NULL, new_len: 0); | 
|---|
| 812 | free_insn_slot(slot: p->ainsn.insn, dirty: p->ainsn.boostable); | 
|---|
| 813 | p->ainsn.insn = NULL; | 
|---|
| 814 | } | 
|---|
| 815 | } | 
|---|
| 816 |  | 
|---|
| 817 | static nokprobe_inline void | 
|---|
| 818 | save_previous_kprobe(struct kprobe_ctlblk *kcb) | 
|---|
| 819 | { | 
|---|
| 820 | kcb->prev_kprobe.kp = kprobe_running(); | 
|---|
| 821 | kcb->prev_kprobe.status = kcb->kprobe_status; | 
|---|
| 822 | kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags; | 
|---|
| 823 | kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; | 
|---|
| 824 | } | 
|---|
| 825 |  | 
|---|
| 826 | static nokprobe_inline void | 
|---|
| 827 | restore_previous_kprobe(struct kprobe_ctlblk *kcb) | 
|---|
| 828 | { | 
|---|
| 829 | __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); | 
|---|
| 830 | kcb->kprobe_status = kcb->prev_kprobe.status; | 
|---|
| 831 | kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; | 
|---|
| 832 | kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; | 
|---|
| 833 | } | 
|---|
| 834 |  | 
|---|
| 835 | static nokprobe_inline void | 
|---|
| 836 | set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | 
|---|
| 837 | struct kprobe_ctlblk *kcb) | 
|---|
| 838 | { | 
|---|
| 839 | __this_cpu_write(current_kprobe, p); | 
|---|
| 840 | kcb->kprobe_saved_flags = kcb->kprobe_old_flags | 
|---|
| 841 | = (regs->flags & X86_EFLAGS_IF); | 
|---|
| 842 | } | 
|---|
| 843 |  | 
|---|
| 844 | static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, | 
|---|
| 845 | struct kprobe_ctlblk *kcb) | 
|---|
| 846 | { | 
|---|
| 847 | /* Restore back the original saved kprobes variables and continue. */ | 
|---|
| 848 | if (kcb->kprobe_status == KPROBE_REENTER) { | 
|---|
| 849 | /* This will restore both kcb and current_kprobe */ | 
|---|
| 850 | restore_previous_kprobe(kcb); | 
|---|
| 851 | } else { | 
|---|
| 852 | /* | 
|---|
| 853 | * Always update the kcb status because | 
|---|
| 854 | * reset_curent_kprobe() doesn't update kcb. | 
|---|
| 855 | */ | 
|---|
| 856 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | 
|---|
| 857 | if (cur->post_handler) | 
|---|
| 858 | cur->post_handler(cur, regs, 0); | 
|---|
| 859 | reset_current_kprobe(); | 
|---|
| 860 | } | 
|---|
| 861 | } | 
|---|
| 862 | NOKPROBE_SYMBOL(kprobe_post_process); | 
|---|
| 863 |  | 
|---|
| 864 | static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, | 
|---|
| 865 | struct kprobe_ctlblk *kcb, int reenter) | 
|---|
| 866 | { | 
|---|
| 867 | if (setup_detour_execution(p, regs, reenter)) | 
|---|
| 868 | return; | 
|---|
| 869 |  | 
|---|
| 870 | #if !defined(CONFIG_PREEMPTION) | 
|---|
| 871 | if (p->ainsn.boostable) { | 
|---|
| 872 | /* Boost up -- we can execute copied instructions directly */ | 
|---|
| 873 | if (!reenter) | 
|---|
| 874 | reset_current_kprobe(); | 
|---|
| 875 | /* | 
|---|
| 876 | * Reentering boosted probe doesn't reset current_kprobe, | 
|---|
| 877 | * nor set current_kprobe, because it doesn't use single | 
|---|
| 878 | * stepping. | 
|---|
| 879 | */ | 
|---|
| 880 | regs->ip = (unsigned long)p->ainsn.insn; | 
|---|
| 881 | return; | 
|---|
| 882 | } | 
|---|
| 883 | #endif | 
|---|
| 884 | if (reenter) { | 
|---|
| 885 | save_previous_kprobe(kcb); | 
|---|
| 886 | set_current_kprobe(p, regs, kcb); | 
|---|
| 887 | kcb->kprobe_status = KPROBE_REENTER; | 
|---|
| 888 | } else | 
|---|
| 889 | kcb->kprobe_status = KPROBE_HIT_SS; | 
|---|
| 890 |  | 
|---|
| 891 | if (p->ainsn.emulate_op) { | 
|---|
| 892 | p->ainsn.emulate_op(p, regs); | 
|---|
| 893 | kprobe_post_process(cur: p, regs, kcb); | 
|---|
| 894 | return; | 
|---|
| 895 | } | 
|---|
| 896 |  | 
|---|
| 897 | /* Disable interrupt, and set ip register on trampoline */ | 
|---|
| 898 | regs->flags &= ~X86_EFLAGS_IF; | 
|---|
| 899 | regs->ip = (unsigned long)p->ainsn.insn; | 
|---|
| 900 | } | 
|---|
| 901 | NOKPROBE_SYMBOL(setup_singlestep); | 
|---|
| 902 |  | 
|---|
| 903 | /* | 
|---|
| 904 | * Called after single-stepping.  p->addr is the address of the | 
|---|
| 905 | * instruction whose first byte has been replaced by the "int3" | 
|---|
| 906 | * instruction.  To avoid the SMP problems that can occur when we | 
|---|
| 907 | * temporarily put back the original opcode to single-step, we | 
|---|
| 908 | * single-stepped a copy of the instruction.  The address of this | 
|---|
| 909 | * copy is p->ainsn.insn. We also doesn't use trap, but "int3" again | 
|---|
| 910 | * right after the copied instruction. | 
|---|
| 911 | * Different from the trap single-step, "int3" single-step can not | 
|---|
| 912 | * handle the instruction which changes the ip register, e.g. jmp, | 
|---|
| 913 | * call, conditional jmp, and the instructions which changes the IF | 
|---|
| 914 | * flags because interrupt must be disabled around the single-stepping. | 
|---|
| 915 | * Such instructions are software emulated, but others are single-stepped | 
|---|
| 916 | * using "int3". | 
|---|
| 917 | * | 
|---|
| 918 | * When the 2nd "int3" handled, the regs->ip and regs->flags needs to | 
|---|
| 919 | * be adjusted, so that we can resume execution on correct code. | 
|---|
| 920 | */ | 
|---|
| 921 | static void resume_singlestep(struct kprobe *p, struct pt_regs *regs, | 
|---|
| 922 | struct kprobe_ctlblk *kcb) | 
|---|
| 923 | { | 
|---|
| 924 | unsigned long copy_ip = (unsigned long)p->ainsn.insn; | 
|---|
| 925 | unsigned long orig_ip = (unsigned long)p->addr; | 
|---|
| 926 |  | 
|---|
| 927 | /* Restore saved interrupt flag and ip register */ | 
|---|
| 928 | regs->flags |= kcb->kprobe_saved_flags; | 
|---|
| 929 | /* Note that regs->ip is executed int3 so must be a step back */ | 
|---|
| 930 | regs->ip += (orig_ip - copy_ip) - INT3_INSN_SIZE; | 
|---|
| 931 | } | 
|---|
| 932 | NOKPROBE_SYMBOL(resume_singlestep); | 
|---|
| 933 |  | 
|---|
| 934 | /* | 
|---|
| 935 | * We have reentered the kprobe_handler(), since another probe was hit while | 
|---|
| 936 | * within the handler. We save the original kprobes variables and just single | 
|---|
| 937 | * step on the instruction of the new probe without calling any user handlers. | 
|---|
| 938 | */ | 
|---|
| 939 | static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | 
|---|
| 940 | struct kprobe_ctlblk *kcb) | 
|---|
| 941 | { | 
|---|
| 942 | switch (kcb->kprobe_status) { | 
|---|
| 943 | case KPROBE_HIT_SSDONE: | 
|---|
| 944 | case KPROBE_HIT_ACTIVE: | 
|---|
| 945 | case KPROBE_HIT_SS: | 
|---|
| 946 | kprobes_inc_nmissed_count(p); | 
|---|
| 947 | setup_singlestep(p, regs, kcb, reenter: 1); | 
|---|
| 948 | break; | 
|---|
| 949 | case KPROBE_REENTER: | 
|---|
| 950 | /* A probe has been hit in the codepath leading up to, or just | 
|---|
| 951 | * after, single-stepping of a probed instruction. This entire | 
|---|
| 952 | * codepath should strictly reside in .kprobes.text section. | 
|---|
| 953 | * Raise a BUG or we'll continue in an endless reentering loop | 
|---|
| 954 | * and eventually a stack overflow. | 
|---|
| 955 | */ | 
|---|
| 956 | pr_err( "Unrecoverable kprobe detected.\n"); | 
|---|
| 957 | dump_kprobe(kp: p); | 
|---|
| 958 | BUG(); | 
|---|
| 959 | default: | 
|---|
| 960 | /* impossible cases */ | 
|---|
| 961 | WARN_ON(1); | 
|---|
| 962 | return 0; | 
|---|
| 963 | } | 
|---|
| 964 |  | 
|---|
| 965 | return 1; | 
|---|
| 966 | } | 
|---|
| 967 | NOKPROBE_SYMBOL(reenter_kprobe); | 
|---|
| 968 |  | 
|---|
| 969 | static nokprobe_inline int kprobe_is_ss(struct kprobe_ctlblk *kcb) | 
|---|
| 970 | { | 
|---|
| 971 | return (kcb->kprobe_status == KPROBE_HIT_SS || | 
|---|
| 972 | kcb->kprobe_status == KPROBE_REENTER); | 
|---|
| 973 | } | 
|---|
| 974 |  | 
|---|
| 975 | /* | 
|---|
| 976 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | 
|---|
| 977 | * remain disabled throughout this function. | 
|---|
| 978 | */ | 
|---|
| 979 | int kprobe_int3_handler(struct pt_regs *regs) | 
|---|
| 980 | { | 
|---|
| 981 | kprobe_opcode_t *addr; | 
|---|
| 982 | struct kprobe *p; | 
|---|
| 983 | struct kprobe_ctlblk *kcb; | 
|---|
| 984 |  | 
|---|
| 985 | if (user_mode(regs)) | 
|---|
| 986 | return 0; | 
|---|
| 987 |  | 
|---|
| 988 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); | 
|---|
| 989 | /* | 
|---|
| 990 | * We don't want to be preempted for the entire duration of kprobe | 
|---|
| 991 | * processing. Since int3 and debug trap disables irqs and we clear | 
|---|
| 992 | * IF while singlestepping, it must be no preemptible. | 
|---|
| 993 | */ | 
|---|
| 994 |  | 
|---|
| 995 | kcb = get_kprobe_ctlblk(); | 
|---|
| 996 | p = get_kprobe(addr); | 
|---|
| 997 |  | 
|---|
| 998 | if (p) { | 
|---|
| 999 | if (kprobe_running()) { | 
|---|
| 1000 | if (reenter_kprobe(p, regs, kcb)) | 
|---|
| 1001 | return 1; | 
|---|
| 1002 | } else { | 
|---|
| 1003 | set_current_kprobe(p, regs, kcb); | 
|---|
| 1004 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | 
|---|
| 1005 |  | 
|---|
| 1006 | /* | 
|---|
| 1007 | * If we have no pre-handler or it returned 0, we | 
|---|
| 1008 | * continue with normal processing.  If we have a | 
|---|
| 1009 | * pre-handler and it returned non-zero, that means | 
|---|
| 1010 | * user handler setup registers to exit to another | 
|---|
| 1011 | * instruction, we must skip the single stepping. | 
|---|
| 1012 | */ | 
|---|
| 1013 | if (!p->pre_handler || !p->pre_handler(p, regs)) | 
|---|
| 1014 | setup_singlestep(p, regs, kcb, reenter: 0); | 
|---|
| 1015 | else | 
|---|
| 1016 | reset_current_kprobe(); | 
|---|
| 1017 | return 1; | 
|---|
| 1018 | } | 
|---|
| 1019 | } else if (kprobe_is_ss(kcb)) { | 
|---|
| 1020 | p = kprobe_running(); | 
|---|
| 1021 | if ((unsigned long)p->ainsn.insn < regs->ip && | 
|---|
| 1022 | (unsigned long)p->ainsn.insn + MAX_INSN_SIZE > regs->ip) { | 
|---|
| 1023 | /* Most provably this is the second int3 for singlestep */ | 
|---|
| 1024 | resume_singlestep(p, regs, kcb); | 
|---|
| 1025 | kprobe_post_process(cur: p, regs, kcb); | 
|---|
| 1026 | return 1; | 
|---|
| 1027 | } | 
|---|
| 1028 | } /* else: not a kprobe fault; let the kernel handle it */ | 
|---|
| 1029 |  | 
|---|
| 1030 | return 0; | 
|---|
| 1031 | } | 
|---|
| 1032 | NOKPROBE_SYMBOL(kprobe_int3_handler); | 
|---|
| 1033 |  | 
|---|
| 1034 | int kprobe_fault_handler(struct pt_regs *regs, int trapnr) | 
|---|
| 1035 | { | 
|---|
| 1036 | struct kprobe *cur = kprobe_running(); | 
|---|
| 1037 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | 
|---|
| 1038 |  | 
|---|
| 1039 | if (unlikely(regs->ip == (unsigned long)cur->ainsn.insn)) { | 
|---|
| 1040 | /* This must happen on single-stepping */ | 
|---|
| 1041 | WARN_ON(kcb->kprobe_status != KPROBE_HIT_SS && | 
|---|
| 1042 | kcb->kprobe_status != KPROBE_REENTER); | 
|---|
| 1043 | /* | 
|---|
| 1044 | * We are here because the instruction being single | 
|---|
| 1045 | * stepped caused a page fault. We reset the current | 
|---|
| 1046 | * kprobe and the ip points back to the probe address | 
|---|
| 1047 | * and allow the page fault handler to continue as a | 
|---|
| 1048 | * normal page fault. | 
|---|
| 1049 | */ | 
|---|
| 1050 | regs->ip = (unsigned long)cur->addr; | 
|---|
| 1051 |  | 
|---|
| 1052 | /* | 
|---|
| 1053 | * If the IF flag was set before the kprobe hit, | 
|---|
| 1054 | * don't touch it: | 
|---|
| 1055 | */ | 
|---|
| 1056 | regs->flags |= kcb->kprobe_old_flags; | 
|---|
| 1057 |  | 
|---|
| 1058 | if (kcb->kprobe_status == KPROBE_REENTER) | 
|---|
| 1059 | restore_previous_kprobe(kcb); | 
|---|
| 1060 | else | 
|---|
| 1061 | reset_current_kprobe(); | 
|---|
| 1062 | } | 
|---|
| 1063 |  | 
|---|
| 1064 | return 0; | 
|---|
| 1065 | } | 
|---|
| 1066 | NOKPROBE_SYMBOL(kprobe_fault_handler); | 
|---|
| 1067 |  | 
|---|
| 1068 | int __init arch_populate_kprobe_blacklist(void) | 
|---|
| 1069 | { | 
|---|
| 1070 | return kprobe_add_area_blacklist(start: (unsigned long)__entry_text_start, | 
|---|
| 1071 | end: (unsigned long)__entry_text_end); | 
|---|
| 1072 | } | 
|---|
| 1073 |  | 
|---|
| 1074 | int __init arch_init_kprobes(void) | 
|---|
| 1075 | { | 
|---|
| 1076 | return 0; | 
|---|
| 1077 | } | 
|---|
| 1078 |  | 
|---|
| 1079 | int arch_trampoline_kprobe(struct kprobe *p) | 
|---|
| 1080 | { | 
|---|
| 1081 | return 0; | 
|---|
| 1082 | } | 
|---|
| 1083 |  | 
|---|