| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|---|
| 2 | |
| 3 | #include <linux/irq-entry-common.h> | 
| 4 | #include <linux/resume_user_mode.h> | 
| 5 | #include <linux/highmem.h> | 
| 6 | #include <linux/jump_label.h> | 
| 7 | #include <linux/kmsan.h> | 
| 8 | #include <linux/livepatch.h> | 
| 9 | #include <linux/tick.h> | 
| 10 | |
| 11 | /* Workaround to allow gradual conversion of architecture code */ | 
| 12 | void __weak arch_do_signal_or_restart(struct pt_regs *regs) { } | 
| 13 | |
| 14 | /** | 
| 15 | * exit_to_user_mode_loop - do any pending work before leaving to user space | 
| 16 | * @regs: Pointer to pt_regs on entry stack | 
| 17 | * @ti_work: TIF work flags as read by the caller | 
| 18 | */ | 
| 19 | __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs, | 
| 20 | unsigned long ti_work) | 
| 21 | { | 
| 22 | /* | 
| 23 | * Before returning to user space ensure that all pending work | 
| 24 | * items have been completed. | 
| 25 | */ | 
| 26 | while (ti_work & EXIT_TO_USER_MODE_WORK) { | 
| 27 | |
| 28 | local_irq_enable_exit_to_user(ti_work); | 
| 29 | |
| 30 | if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) | 
| 31 | schedule(); | 
| 32 | |
| 33 | if (ti_work & _TIF_UPROBE) | 
| 34 | uprobe_notify_resume(regs); | 
| 35 | |
| 36 | if (ti_work & _TIF_PATCH_PENDING) | 
| 37 | klp_update_patch_state(current); | 
| 38 | |
| 39 | if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) | 
| 40 | arch_do_signal_or_restart(regs); | 
| 41 | |
| 42 | if (ti_work & _TIF_NOTIFY_RESUME) | 
| 43 | resume_user_mode_work(regs); | 
| 44 | |
| 45 | /* Architecture specific TIF work */ | 
| 46 | arch_exit_to_user_mode_work(regs, ti_work); | 
| 47 | |
| 48 | /* | 
| 49 | * Disable interrupts and reevaluate the work flags as they | 
| 50 | * might have changed while interrupts and preemption was | 
| 51 | * enabled above. | 
| 52 | */ | 
| 53 | local_irq_disable_exit_to_user(); | 
| 54 | |
| 55 | /* Check if any of the above work has queued a deferred wakeup */ | 
| 56 | tick_nohz_user_enter_prepare(); | 
| 57 | |
| 58 | ti_work = read_thread_flags(); | 
| 59 | } | 
| 60 | |
| 61 | /* Return the latest work state for arch_exit_to_user_mode() */ | 
| 62 | return ti_work; | 
| 63 | } | 
| 64 | |
| 65 | noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs) | 
| 66 | { | 
| 67 | enter_from_user_mode(regs); | 
| 68 | } | 
| 69 | |
| 70 | noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs) | 
| 71 | { | 
| 72 | instrumentation_begin(); | 
| 73 | exit_to_user_mode_prepare(regs); | 
| 74 | instrumentation_end(); | 
| 75 | exit_to_user_mode(); | 
| 76 | } | 
| 77 | |
| 78 | noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) | 
| 79 | { | 
| 80 | irqentry_state_t ret = { | 
| 81 | .exit_rcu = false, | 
| 82 | }; | 
| 83 | |
| 84 | if (user_mode(regs)) { | 
| 85 | irqentry_enter_from_user_mode(regs); | 
| 86 | return ret; | 
| 87 | } | 
| 88 | |
| 89 | /* | 
| 90 | * If this entry hit the idle task invoke ct_irq_enter() whether | 
| 91 | * RCU is watching or not. | 
| 92 | * | 
| 93 | * Interrupts can nest when the first interrupt invokes softirq | 
| 94 | * processing on return which enables interrupts. | 
| 95 | * | 
| 96 | * Scheduler ticks in the idle task can mark quiescent state and | 
| 97 | * terminate a grace period, if and only if the timer interrupt is | 
| 98 | * not nested into another interrupt. | 
| 99 | * | 
| 100 | * Checking for rcu_is_watching() here would prevent the nesting | 
| 101 | * interrupt to invoke ct_irq_enter(). If that nested interrupt is | 
| 102 | * the tick then rcu_flavor_sched_clock_irq() would wrongfully | 
| 103 | * assume that it is the first interrupt and eventually claim | 
| 104 | * quiescent state and end grace periods prematurely. | 
| 105 | * | 
| 106 | * Unconditionally invoke ct_irq_enter() so RCU state stays | 
| 107 | * consistent. | 
| 108 | * | 
| 109 | * TINY_RCU does not support EQS, so let the compiler eliminate | 
| 110 | * this part when enabled. | 
| 111 | */ | 
| 112 | if (!IS_ENABLED(CONFIG_TINY_RCU) && | 
| 113 | (is_idle_task(current) || arch_in_rcu_eqs())) { | 
| 114 | /* | 
| 115 | * If RCU is not watching then the same careful | 
| 116 | * sequence vs. lockdep and tracing is required | 
| 117 | * as in irqentry_enter_from_user_mode(). | 
| 118 | */ | 
| 119 | lockdep_hardirqs_off(CALLER_ADDR0); | 
| 120 | ct_irq_enter(); | 
| 121 | instrumentation_begin(); | 
| 122 | kmsan_unpoison_entry_regs(regs); | 
| 123 | trace_hardirqs_off_finish(); | 
| 124 | instrumentation_end(); | 
| 125 | |
| 126 | ret.exit_rcu = true; | 
| 127 | return ret; | 
| 128 | } | 
| 129 | |
| 130 | /* | 
| 131 | * If RCU is watching then RCU only wants to check whether it needs | 
| 132 | * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() | 
| 133 | * already contains a warning when RCU is not watching, so no point | 
| 134 | * in having another one here. | 
| 135 | */ | 
| 136 | lockdep_hardirqs_off(CALLER_ADDR0); | 
| 137 | instrumentation_begin(); | 
| 138 | kmsan_unpoison_entry_regs(regs); | 
| 139 | rcu_irq_enter_check_tick(); | 
| 140 | trace_hardirqs_off_finish(); | 
| 141 | instrumentation_end(); | 
| 142 | |
| 143 | return ret; | 
| 144 | } | 
| 145 | |
| 146 | /** | 
| 147 | * arch_irqentry_exit_need_resched - Architecture specific need resched function | 
| 148 | * | 
| 149 | * Invoked from raw_irqentry_exit_cond_resched() to check if resched is needed. | 
| 150 | * Defaults return true. | 
| 151 | * | 
| 152 | * The main purpose is to permit arch to avoid preemption of a task from an IRQ. | 
| 153 | */ | 
| 154 | static inline bool arch_irqentry_exit_need_resched(void); | 
| 155 | |
| 156 | #ifndef arch_irqentry_exit_need_resched | 
| 157 | static inline bool arch_irqentry_exit_need_resched(void) { return true; } | 
| 158 | #endif | 
| 159 | |
| 160 | void raw_irqentry_exit_cond_resched(void) | 
| 161 | { | 
| 162 | if (!preempt_count()) { | 
| 163 | /* Sanity check RCU and thread stack */ | 
| 164 | rcu_irq_exit_check_preempt(); | 
| 165 | if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) | 
| 166 | WARN_ON_ONCE(!on_thread_stack()); | 
| 167 | if (need_resched() && arch_irqentry_exit_need_resched()) | 
| 168 | preempt_schedule_irq(); | 
| 169 | } | 
| 170 | } | 
| 171 | #ifdef CONFIG_PREEMPT_DYNAMIC | 
| 172 | #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) | 
| 173 | DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); | 
| 174 | #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) | 
| 175 | DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); | 
| 176 | void dynamic_irqentry_exit_cond_resched(void) | 
| 177 | { | 
| 178 | if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) | 
| 179 | return; | 
| 180 | raw_irqentry_exit_cond_resched(); | 
| 181 | } | 
| 182 | #endif | 
| 183 | #endif | 
| 184 | |
| 185 | noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) | 
| 186 | { | 
| 187 | lockdep_assert_irqs_disabled(); | 
| 188 | |
| 189 | /* Check whether this returns to user mode */ | 
| 190 | if (user_mode(regs)) { | 
| 191 | irqentry_exit_to_user_mode(regs); | 
| 192 | } else if (!regs_irqs_disabled(regs)) { | 
| 193 | /* | 
| 194 | * If RCU was not watching on entry this needs to be done | 
| 195 | * carefully and needs the same ordering of lockdep/tracing | 
| 196 | * and RCU as the return to user mode path. | 
| 197 | */ | 
| 198 | if (state.exit_rcu) { | 
| 199 | instrumentation_begin(); | 
| 200 | /* Tell the tracer that IRET will enable interrupts */ | 
| 201 | trace_hardirqs_on_prepare(); | 
| 202 | lockdep_hardirqs_on_prepare(); | 
| 203 | instrumentation_end(); | 
| 204 | ct_irq_exit(); | 
| 205 | lockdep_hardirqs_on(CALLER_ADDR0); | 
| 206 | return; | 
| 207 | } | 
| 208 | |
| 209 | instrumentation_begin(); | 
| 210 | if (IS_ENABLED(CONFIG_PREEMPTION)) | 
| 211 | irqentry_exit_cond_resched(); | 
| 212 | |
| 213 | /* Covers both tracing and lockdep */ | 
| 214 | trace_hardirqs_on(); | 
| 215 | instrumentation_end(); | 
| 216 | } else { | 
| 217 | /* | 
| 218 | * IRQ flags state is correct already. Just tell RCU if it | 
| 219 | * was not watching on entry. | 
| 220 | */ | 
| 221 | if (state.exit_rcu) | 
| 222 | ct_irq_exit(); | 
| 223 | } | 
| 224 | } | 
| 225 | |
| 226 | irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs) | 
| 227 | { | 
| 228 | irqentry_state_t irq_state; | 
| 229 | |
| 230 | irq_state.lockdep = lockdep_hardirqs_enabled(); | 
| 231 | |
| 232 | __nmi_enter(); | 
| 233 | lockdep_hardirqs_off(CALLER_ADDR0); | 
| 234 | lockdep_hardirq_enter(); | 
| 235 | ct_nmi_enter(); | 
| 236 | |
| 237 | instrumentation_begin(); | 
| 238 | kmsan_unpoison_entry_regs(regs); | 
| 239 | trace_hardirqs_off_finish(); | 
| 240 | ftrace_nmi_enter(); | 
| 241 | instrumentation_end(); | 
| 242 | |
| 243 | return irq_state; | 
| 244 | } | 
| 245 | |
| 246 | void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) | 
| 247 | { | 
| 248 | instrumentation_begin(); | 
| 249 | ftrace_nmi_exit(); | 
| 250 | if (irq_state.lockdep) { | 
| 251 | trace_hardirqs_on_prepare(); | 
| 252 | lockdep_hardirqs_on_prepare(); | 
| 253 | } | 
| 254 | instrumentation_end(); | 
| 255 | |
| 256 | ct_nmi_exit(); | 
| 257 | lockdep_hardirq_exit(); | 
| 258 | if (irq_state.lockdep) | 
| 259 | lockdep_hardirqs_on(CALLER_ADDR0); | 
| 260 | __nmi_exit(); | 
| 261 | } | 
| 262 | 
