Lines Matching +full:75 +full:- +full:ec
1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * User-space Probes (UProbes) for x86
5 * Copyright (C) IBM Corporation, 2008-2011
22 /* Post-execution fixups. */
42 #define OPCODE1(insn) ((insn)->opcode.bytes[0])
43 #define OPCODE2(insn) ((insn)->opcode.bytes[1])
44 #define OPCODE3(insn) ((insn)->opcode.bytes[2])
45 #define MODRM_REG(insn) X86_MODRM_REG((insn)->modrm.value)
55 * Good-instruction tables for 32-bit apps. This is non-const and volatile
60 * 6c-6f - ins,outs. SEGVs if used in userspace
61 * e4-e7 - in,out imm. SEGVs if used in userspace
62 * ec-ef - in,out acc. SEGVs if used in userspace
63 * cc - int3. SIGTRAP if used in userspace
64 * ce - into. Not used in userspace - no kernel support to make it useful. SEGVs
66 * f1 - int1. SIGTRAP if used in userspace
67 * f4 - hlt. SEGVs if used in userspace
68 * fa - cli. SEGVs if used in userspace
69 * fb - sti. SEGVs if used in userspace
72 * 07,17,1f - pop es/ss/ds
76 * of userspace single-stepping (TF flag) is fragile.
79 * cd - int N.
81 * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
82 * Not supported since kernel's handling of userspace single-stepping
84 * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
89 /* ---------------------------------------------- */
106 /* ---------------------------------------------- */
113 /* Good-instruction tables for 64-bit apps.
116 * 06,07 - formerly push/pop es
117 * 0e - formerly push cs
118 * 16,17 - formerly push/pop ss
119 * 1e,1f - formerly push/pop ds
120 * 27,2f,37,3f - formerly daa/das/aaa/aas
121 * 60,61 - formerly pusha/popa
122 * 62 - formerly bound. EVEX prefix for AVX512 (not yet supported)
123 * 82 - formerly redundant encoding of Group1
124 * 9a - formerly call seg:ofs
125 * ce - formerly into
126 * d4,d5 - formerly aam/aad
127 * d6 - formerly undocumented salc
128 * ea - formerly jmp seg:ofs
131 * 6c-6f - ins,outs. SEGVs if used in userspace
132 * e4-e7 - in,out imm. SEGVs if used in userspace
133 * ec-ef - in,out acc. SEGVs if used in userspace
134 * cc - int3. SIGTRAP if used in userspace
135 * f1 - int1. SIGTRAP if used in userspace
136 * f4 - hlt. SEGVs if used in userspace
137 * fa - cli. SEGVs if used in userspace
138 * fb - sti. SEGVs if used in userspace
141 * cd - int N.
143 * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
144 * Not supported since kernel's handling of userspace single-stepping
146 * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
151 /* ---------------------------------------------- */
168 /* ---------------------------------------------- */
175 /* Using this for both 64-bit and 32-bit apps.
177 * 0f 00 - SLDT/STR/LLDT/LTR/VERR/VERW/-/- group. System insns
178 * 0f 01 - SGDT/SIDT/LGDT/LIDT/SMSW/-/LMSW/INVLPG group.
180 * Some are in fact non-system: xend, xtest, rdtscp, maybe more
181 * 0f 05 - syscall
182 * 0f 06 - clts (CPL0 insn)
183 * 0f 07 - sysret
184 * 0f 08 - invd (CPL0 insn)
185 * 0f 09 - wbinvd (CPL0 insn)
186 * 0f 0b - ud2
187 * 0f 30 - wrmsr (CPL0 insn) (then why rdmsr is allowed, it's also CPL0 insn?)
188 * 0f 34 - sysenter
189 * 0f 35 - sysexit
190 * 0f 37 - getsec
191 * 0f 78 - vmread (Intel VMX. CPL0 insn)
192 * 0f 79 - vmwrite (Intel VMX. CPL0 insn)
195 * 0f ae - group15: [f]xsave,[f]xrstor,[v]{ld,st}mxcsr,clflush[opt],
197 * Why? They are all user-executable.
201 /* ---------------------------------------------- */
218 /* ---------------------------------------------- */
226 * 0f - 2-byte instructions: For many of these instructions, the validity
231 * 8f - Group 1 - only reg = 0 is OK
232 * c6-c7 - Group 11 - only reg = 0 is OK
233 * d9-df - fpu insns with some illegal encodings
234 * f2, f3 - repnz, repz prefixes. These are also the first byte for
235 * certain floating-point instructions, such as addsd.
237 * fe - Group 4 - only reg = 0 or 1 is OK
238 * ff - Group 5 - only reg = 0-6 is OK
240 * others -- Do we need to support these?
242 * 0f - (floating-point?) prefetch instructions
243 * 07, 17, 1f - pop es, pop ss, pop ds
244 * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
246 * 67 - addr16 prefix
247 * ce - into
248 * f0 - lock prefix
253 * - Where necessary, examine the modrm byte and allow only valid instructions
284 ret = insn_decode(insn, auprobe->insn, sizeof(auprobe->insn), m); in uprobe_init_insn()
286 return -ENOEXEC; in uprobe_init_insn()
289 return -ENOTSUPP; in uprobe_init_insn()
293 return -ENOTSUPP; in uprobe_init_insn()
303 if (insn->opcode.nbytes == 2) { in uprobe_init_insn()
308 return -ENOTSUPP; in uprobe_init_insn()
348 * only for native 64-bit process, the compat process still uses in arch_uprobe_trampoline()
352 *psize = uretprobe_trampoline_end - uretprobe_trampoline_entry; in arch_uprobe_trampoline()
364 return tramp + (uretprobe_syscall_check - uretprobe_trampoline_entry); in trampoline_check_ip()
372 if (regs->ip != trampoline_check_ip()) in SYSCALL_DEFINE0()
375 err = copy_from_user(r11_cx_ax, (void __user *)regs->sp, sizeof(r11_cx_ax)); in SYSCALL_DEFINE0()
380 regs->r11 = r11_cx_ax[0]; in SYSCALL_DEFINE0()
381 regs->cx = r11_cx_ax[1]; in SYSCALL_DEFINE0()
382 regs->ax = r11_cx_ax[2]; in SYSCALL_DEFINE0()
383 regs->sp += sizeof(r11_cx_ax); in SYSCALL_DEFINE0()
384 regs->orig_ax = -1; in SYSCALL_DEFINE0()
386 ip = regs->ip; in SYSCALL_DEFINE0()
387 sp = regs->sp; in SYSCALL_DEFINE0()
397 if (regs->sp != sp || shstk_is_enabled()) in SYSCALL_DEFINE0()
398 return regs->ax; in SYSCALL_DEFINE0()
399 regs->sp -= sizeof(r11_cx_ax); in SYSCALL_DEFINE0()
402 r11_cx_ax[0] = regs->r11; in SYSCALL_DEFINE0()
403 r11_cx_ax[1] = regs->cx; in SYSCALL_DEFINE0()
410 r11_cx_ax[2] = regs->ip; in SYSCALL_DEFINE0()
411 regs->ip = ip; in SYSCALL_DEFINE0()
413 err = copy_to_user((void __user *)regs->sp, r11_cx_ax, sizeof(r11_cx_ax)); in SYSCALL_DEFINE0()
418 regs->r11 = regs->flags; in SYSCALL_DEFINE0()
419 regs->cx = regs->ip; in SYSCALL_DEFINE0()
421 return regs->ax; in SYSCALL_DEFINE0()
425 return -1; in SYSCALL_DEFINE0()
429 * If arch_uprobe->insn doesn't use rip-relative addressing, return
432 * defparam->fixups accordingly. (The contents of the scratch register
433 * will be saved before we single-step the modified instruction,
436 * We do this because a rip-relative instruction can access only a
437 * relatively small area (+/- 2 GB from the instruction), and the XOL
443 * Some useful facts about rip-relative instructions:
445 * - There's always a modrm byte with bit layout "00 reg 101".
446 * - There's never a SIB byte.
447 * - The displacement is always 4 bytes.
448 * - REX.B=1 bit in REX prefix, which normally extends r/m field,
449 * has no effect on rip-relative mode. It doesn't make modrm byte
466 if (insn->rex_prefix.nbytes) { in riprel_analyze()
467 cursor = auprobe->insn + insn_offset_rex_prefix(insn); in riprel_analyze()
475 if (insn->vex_prefix.nbytes >= 3) { in riprel_analyze()
481 * Setting EVEX.x since (in non-SIB encoding) EVEX.x in riprel_analyze()
483 * For VEX3-encoded insns, VEX3.x value has no effect in in riprel_analyze()
484 * non-SIB encoding, the change is superfluous but harmless. in riprel_analyze()
486 cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; in riprel_analyze()
491 * Convert from rip-relative addressing to register-relative addressing in riprel_analyze()
503 * First appeared in Haswell (BMI2 insn). It is vex-encoded. in riprel_analyze()
512 * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm. in riprel_analyze()
513 * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi). in riprel_analyze()
514 * AMD says it has no 3-operand form (vex.vvvv must be 1111) in riprel_analyze()
524 * BP is stack-segment based (may be a problem?). in riprel_analyze()
525 * AX, DX, CX are off-limits (many implicit users). in riprel_analyze()
526 * SP is unusable (it's stack pointer - think about "pop mem"; in riprel_analyze()
527 * also, rsp+disp32 needs sib encoding -> insn length change). in riprel_analyze()
532 if (insn->vex_prefix.nbytes) in riprel_analyze()
533 reg2 = insn->vex_prefix.bytes[2]; in riprel_analyze()
537 * vex.vvvv field is in bits 6-3, bits are inverted. in riprel_analyze()
538 * But in 32-bit mode, high-order bit may be ignored. in riprel_analyze()
539 * Therefore, let's consider only 3 low-order bits. in riprel_analyze()
550 auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI; in riprel_analyze()
553 auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI; in riprel_analyze()
557 auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX; in riprel_analyze()
564 cursor = auprobe->insn + insn_offset_modrm(insn); in riprel_analyze()
576 if (auprobe->defparam.fixups & UPROBE_FIX_RIP_SI) in scratch_reg()
577 return ®s->si; in scratch_reg()
578 if (auprobe->defparam.fixups & UPROBE_FIX_RIP_DI) in scratch_reg()
579 return ®s->di; in scratch_reg()
580 return ®s->bx; in scratch_reg()
584 * If we're emulating a rip-relative instruction, save the contents
589 if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { in riprel_pre_xol()
590 struct uprobe_task *utask = current->utask; in riprel_pre_xol()
593 utask->autask.saved_scratch_register = *sr; in riprel_pre_xol()
594 *sr = utask->vaddr + auprobe->defparam.ilen; in riprel_pre_xol()
600 if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { in riprel_post_xol()
601 struct uprobe_task *utask = current->utask; in riprel_post_xol()
604 *sr = utask->autask.saved_scratch_register; in riprel_post_xol()
607 #else /* 32-bit: */
609 * No RIP-relative addressing on 32-bit
645 unsigned long new_sp = regs->sp - sizeof_long(regs); in emulate_push_stack()
648 return -EFAULT; in emulate_push_stack()
650 regs->sp = new_sp; in emulate_push_stack()
661 * If the single-stepped instruction was a call, the return address that
665 * If the original instruction was a rip-relative instruction such as
667 * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)".
673 struct uprobe_task *utask = current->utask; in default_post_xol_op()
676 if (auprobe->defparam.fixups & UPROBE_FIX_IP) { in default_post_xol_op()
677 long correction = utask->vaddr - utask->xol_vaddr; in default_post_xol_op()
678 regs->ip += correction; in default_post_xol_op()
679 } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { in default_post_xol_op()
680 regs->sp += sizeof_long(regs); /* Pop incorrect return address */ in default_post_xol_op()
681 if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen)) in default_post_xol_op()
682 return -ERESTART; in default_post_xol_op()
685 if (auprobe->defparam.fixups & UPROBE_FIX_SETF) in default_post_xol_op()
686 utask->autask.saved_tf = true; in default_post_xol_op()
704 return auprobe->branch.opc1 == 0xe8; in branch_is_call()
710 COND(74, 75, XF(ZF)) \
738 unsigned long flags = regs->flags; in check_jmp_cond()
740 switch (auprobe->branch.opc1) { in check_jmp_cond()
757 unsigned long new_ip = regs->ip += auprobe->branch.ilen; in branch_emulate_op()
758 unsigned long offs = (long)auprobe->branch.offs; in branch_emulate_op()
763 * branch_clear_offset) insn out-of-line. In the likely case in branch_emulate_op()
768 * But there is corner case, see the comment in ->post_xol(). in branch_emulate_op()
776 regs->ip = new_ip + offs; in branch_emulate_op()
782 unsigned long *src_ptr = (void *)regs + auprobe->push.reg_offset; in push_emulate_op()
786 regs->ip += auprobe->push.ilen; in push_emulate_op()
796 * "call" insn was executed out-of-line. Just restore ->sp and restart. in branch_post_xol_op()
797 * We could also restore ->ip and try to call branch_emulate_op() again. in branch_post_xol_op()
799 regs->sp += sizeof_long(regs); in branch_post_xol_op()
800 return -ERESTART; in branch_post_xol_op()
807 * out-of-line if ->emulate() fails. We only need this to generate in branch_clear_offset()
811 * But see the comment in ->post_xol(), in the unlikely case it can in branch_clear_offset()
812 * succeed. So we need to ensure that the new ->ip can not fall into in branch_clear_offset()
813 * the non-canonical area and trigger #GP. in branch_clear_offset()
816 * divorce ->insn[] and ->ixol[]. We need to preserve the 1st byte in branch_clear_offset()
817 * of ->insn[] for set_orig_insn(). in branch_clear_offset()
819 memset(auprobe->insn + insn_offset_immediate(insn), in branch_clear_offset()
820 0, insn->immediate.nbytes); in branch_clear_offset()
832 /* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */
851 if (insn->opcode.nbytes != 2) in branch_setup_xol_ops()
852 return -ENOSYS; in branch_setup_xol_ops()
854 * If it is a "near" conditional jmp, OPCODE2() - 0x10 matches in branch_setup_xol_ops()
857 opc1 = OPCODE2(insn) - 0x10; in branch_setup_xol_ops()
861 return -ENOSYS; in branch_setup_xol_ops()
865 * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported. in branch_setup_xol_ops()
866 * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. in branch_setup_xol_ops()
871 return -ENOTSUPP; in branch_setup_xol_ops()
875 auprobe->branch.opc1 = opc1; in branch_setup_xol_ops()
876 auprobe->branch.ilen = insn->length; in branch_setup_xol_ops()
877 auprobe->branch.offs = insn->immediate.value; in branch_setup_xol_ops()
879 auprobe->ops = &branch_xol_ops; in branch_setup_xol_ops()
883 /* Returns -ENOSYS if push_xol_ops doesn't handle this insn */
889 return -ENOSYS; in push_setup_xol_ops()
891 if (insn->length > 2) in push_setup_xol_ops()
892 return -ENOSYS; in push_setup_xol_ops()
893 if (insn->length == 2) { in push_setup_xol_ops()
896 if (insn->rex_prefix.nbytes != 1 || in push_setup_xol_ops()
897 insn->rex_prefix.bytes[0] != 0x41) in push_setup_xol_ops()
898 return -ENOSYS; in push_setup_xol_ops()
927 return -ENOSYS; in push_setup_xol_ops()
958 auprobe->push.reg_offset = reg_offset; in push_setup_xol_ops()
959 auprobe->push.ilen = insn->length; in push_setup_xol_ops()
960 auprobe->ops = &push_xol_ops; in push_setup_xol_ops()
965 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
969 * Return 0 on success or a -ve number on error.
982 if (ret != -ENOSYS) in arch_uprobe_analyze_insn()
986 if (ret != -ENOSYS) in arch_uprobe_analyze_insn()
991 * and annotate defparam->fixups accordingly. in arch_uprobe_analyze_insn()
995 auprobe->defparam.fixups |= UPROBE_FIX_SETF; in arch_uprobe_analyze_insn()
997 case 0xc3: /* ret or lret -- ip is correct */ in arch_uprobe_analyze_insn()
1001 case 0xea: /* jmp absolute -- ip is correct */ in arch_uprobe_analyze_insn()
1004 case 0x9a: /* call absolute - Fix return addr, not ip */ in arch_uprobe_analyze_insn()
1021 auprobe->defparam.ilen = insn.length; in arch_uprobe_analyze_insn()
1022 auprobe->defparam.fixups |= fix_ip_or_call; in arch_uprobe_analyze_insn()
1024 auprobe->ops = &default_xol_ops; in arch_uprobe_analyze_insn()
1029 * arch_uprobe_pre_xol - prepare to execute out of line.
1035 struct uprobe_task *utask = current->utask; in arch_uprobe_pre_xol()
1037 if (auprobe->ops->pre_xol) { in arch_uprobe_pre_xol()
1038 int err = auprobe->ops->pre_xol(auprobe, regs); in arch_uprobe_pre_xol()
1043 regs->ip = utask->xol_vaddr; in arch_uprobe_pre_xol()
1044 utask->autask.saved_trap_nr = current->thread.trap_nr; in arch_uprobe_pre_xol()
1045 current->thread.trap_nr = UPROBE_TRAP_NR; in arch_uprobe_pre_xol()
1047 utask->autask.saved_tf = !!(regs->flags & X86_EFLAGS_TF); in arch_uprobe_pre_xol()
1048 regs->flags |= X86_EFLAGS_TF; in arch_uprobe_pre_xol()
1059 * like do_page_fault/do_trap/etc sets thread.trap_nr != -1.
1062 * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to
1063 * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol().
1067 if (t->thread.trap_nr != UPROBE_TRAP_NR) in arch_uprobe_xol_was_trapped()
1074 * Called after single-stepping. To avoid the SMP problems that can
1076 * single-step, we single-stepped a copy of the instruction.
1078 * This function prepares to resume execution after the single-step.
1082 struct uprobe_task *utask = current->utask; in arch_uprobe_post_xol()
1083 bool send_sigtrap = utask->autask.saved_tf; in arch_uprobe_post_xol()
1086 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); in arch_uprobe_post_xol()
1087 current->thread.trap_nr = utask->autask.saved_trap_nr; in arch_uprobe_post_xol()
1089 if (auprobe->ops->post_xol) { in arch_uprobe_post_xol()
1090 err = auprobe->ops->post_xol(auprobe, regs); in arch_uprobe_post_xol()
1093 * Restore ->ip for restart or post mortem analysis. in arch_uprobe_post_xol()
1094 * ->post_xol() must not return -ERESTART unless this in arch_uprobe_post_xol()
1097 regs->ip = utask->vaddr; in arch_uprobe_post_xol()
1098 if (err == -ERESTART) in arch_uprobe_post_xol()
1111 if (!utask->autask.saved_tf) in arch_uprobe_post_xol()
1112 regs->flags &= ~X86_EFLAGS_TF; in arch_uprobe_post_xol()
1121 struct pt_regs *regs = args->regs; in arch_uprobe_exception_notify()
1155 struct uprobe_task *utask = current->utask; in arch_uprobe_abort_xol()
1157 if (auprobe->ops->abort) in arch_uprobe_abort_xol()
1158 auprobe->ops->abort(auprobe, regs); in arch_uprobe_abort_xol()
1160 current->thread.trap_nr = utask->autask.saved_trap_nr; in arch_uprobe_abort_xol()
1161 regs->ip = utask->vaddr; in arch_uprobe_abort_xol()
1163 if (!utask->autask.saved_tf) in arch_uprobe_abort_xol()
1164 regs->flags &= ~X86_EFLAGS_TF; in arch_uprobe_abort_xol()
1169 if (auprobe->ops->emulate) in __skip_sstep()
1170 return auprobe->ops->emulate(auprobe, regs); in __skip_sstep()
1177 if (ret && (regs->flags & X86_EFLAGS_TF)) in arch_uprobe_skip_sstep()
1186 unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ in arch_uretprobe_hijack_return_addr()
1188 if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize)) in arch_uretprobe_hijack_return_addr()
1189 return -1; in arch_uretprobe_hijack_return_addr()
1195 nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); in arch_uretprobe_hijack_return_addr()
1199 return -1; in arch_uretprobe_hijack_return_addr()
1206 current->pid, regs->sp, regs->ip); in arch_uretprobe_hijack_return_addr()
1211 return -1; in arch_uretprobe_hijack_return_addr()
1218 return regs->sp < ret->stack; in arch_uretprobe_is_alive()
1220 return regs->sp <= ret->stack; in arch_uretprobe_is_alive()